Merge pull request #448 from tree-sitter/highlight-with-queries

Reimplement syntax highlighting with tree queries instead of property sheets
This commit is contained in:
Max Brunsfeld 2019-10-17 13:49:11 -07:00 committed by GitHub
commit 3d6f2822e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 1799 additions and 3860 deletions

77
Cargo.lock generated
View file

@ -93,11 +93,6 @@ dependencies = [
"constant_time_eq 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bytecount"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.2"
@ -266,18 +261,6 @@ name = "lazy_static"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "lexical-core"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "libc"
version = "0.2.61"
@ -328,16 +311,6 @@ name = "nodrop"
version = "0.1.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "nom"
version = "5.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-integer"
version = "0.1.39"
@ -346,15 +319,6 @@ dependencies = [
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-rational"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-traits"
version = "0.2.6"
@ -589,19 +553,6 @@ dependencies = [
"winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rsass"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rust-argon2"
version = "0.5.1"
@ -630,11 +581,6 @@ name = "ryu"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "ryu"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "scopeguard"
version = "0.3.3"
@ -697,20 +643,6 @@ name = "spin"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "stackvector"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "static_assertions"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "strsim"
version = "0.7.0"
@ -825,7 +757,6 @@ dependencies = [
"rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)",
"rsass 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)",
@ -964,7 +895,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
"checksum bitflags 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "228047a76f468627ca71776ecdebd732a3423081fcf5125585bcd7c49886ce12"
"checksum blake2b_simd 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "461f4b879a8eb70c1debf7d0788a9a5ff15f1ea9d25925fea264ef4258bed6b2"
"checksum bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be0fdd54b507df8f22012890aadd099979befdba27713c767993f8380112ca7c"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum c2-chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7d64d04786e0f528460fc884753cf8dddcc466be308f6026f8e355c41a0e4101"
"checksum cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)" = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
@ -987,7 +917,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
"checksum itoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "1306f3464951f30e30d12373d31c79fbd52d236e5e896fd92f96ec7babbbe60b"
"checksum lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a374c89b9db55895453a74c1e38861d9deec0b01b405a82516e9d5de4820dea1"
"checksum lexical-core 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b8b0f90c979adde96d19eb10eb6431ba0c441e2f9e9bdff868b2f6f5114ff519"
"checksum libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)" = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa"
"checksum libloading 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3ad660d7cb8c5822cd83d10897b0f1f1526792737a179e73896152f85b88c2"
"checksum lock_api 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "62ebf1391f6acad60e5c8b43706dde4582df75c06698ab44511d15016bc2442c"
@ -995,9 +924,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
"checksum memchr 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0a3eb002f0535929f1199681417029ebea04aadc0c7a4224b46be99c7f5d6a16"
"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945"
"checksum nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e9761d859320e381010a4f7f8ed425f2c924de33ad121ace447367c713ad561b"
"checksum num-integer 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "e83d528d2677f0518c570baf2b7abdcf0cd2d248860b68507bdcb3e91d4c0cea"
"checksum num-rational 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4e96f040177bb3da242b5b1ecf3f54b5d5af3efbbfb18608977a5d2767b22f10"
"checksum num-traits 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0b3a5d7cc97d6d30d8b9bc8fa19bf45349ffe46241e8816f50f62f6d6aaabee1"
"checksum once_cell 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "532c29a261168a45ce28948f9537ddd7a5dd272cc513b3017b1e82a88f962c37"
"checksum parking_lot 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ab41b4aed082705d1056416ae4468b6ea99d52599ecf3169b00088d43113e337"
@ -1025,12 +952,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f"
"checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1"
"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5"
"checksum rsass 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4520dc8a2786c0319f3947e3d79e735b27f0c63c555b854aaa802e49e3f45098"
"checksum rust-argon2 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4ca4eaef519b494d1f2848fc602d18816fed808a981aedf4f1f00ceb7c9d32cf"
"checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7"
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
@ -1040,8 +965,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum smallbitvec 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1764fe2b30ee783bfe3b9b37b2649d8d590b3148bb12e0079715d4d5c673562e"
"checksum smallvec 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "88aea073965ab29f6edb5493faf96ad662fb18aa9eeb186a3b7057951605ed15"
"checksum spin 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "44363f6f51401c34e7be73db0db371c04705d35efbe9f7d6082e03a921a32c55"
"checksum stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1c4725650978235083241fab0fdc8e694c3de37821524e7534a1a9061d1068af"
"checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
"checksum syn 0.15.22 (registry+https://github.com/rust-lang/crates.io-index)" = "ae8b29eb5210bc5cf63ed6149cbf9adfc82ac0be023d8735c176ee74a2db4da7"
"checksum synstructure 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73687139bf99285483c96ac0add482c3776528beac1d97d444f6e91f203a2015"

View file

@ -32,7 +32,6 @@ serde = "1.0"
serde_derive = "1.0"
regex-syntax = "0.6.4"
regex = "1"
rsass = "^0.11.0"
tiny_http = "0.6"
webbrowser = "0.5.1"

View file

@ -1,6 +1,6 @@
use std::fmt::Write;
use std::io;
use tree_sitter_highlight::PropertySheetError;
use tree_sitter::QueryError;
#[derive(Debug)]
pub struct Error(pub Vec<String>);
@ -50,6 +50,18 @@ impl Error {
}
}
impl<'a> From<QueryError> for Error {
fn from(error: QueryError) -> Self {
Error::new(format!("{:?}", error))
}
}
impl<'a> From<tree_sitter_highlight::Error> for Error {
fn from(error: tree_sitter_highlight::Error) -> Self {
Error::new(format!("{:?}", error))
}
}
impl From<serde_json::Error> for Error {
fn from(error: serde_json::Error) -> Self {
Error::new(error.to_string())
@ -62,12 +74,6 @@ impl From<io::Error> for Error {
}
}
impl From<rsass::Error> for Error {
fn from(error: rsass::Error) -> Self {
Error::new(error.to_string())
}
}
impl From<regex_syntax::ast::Error> for Error {
fn from(error: regex_syntax::ast::Error) -> Self {
Error::new(error.to_string())
@ -79,13 +85,3 @@ impl From<String> for Error {
Error::new(error)
}
}
impl From<PropertySheetError> for Error {
fn from(error: PropertySheetError) -> Self {
match error {
PropertySheetError::InvalidFormat(e) => Self::from(e),
PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()),
PropertySheetError::InvalidJSON(e) => Self::from(e),
}
}
}

View file

@ -6,13 +6,12 @@ mod node_types;
mod npm_files;
pub mod parse_grammar;
mod prepare_grammar;
pub mod properties;
mod render;
mod rules;
mod tables;
use self::build_tables::build_tables;
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use self::parse_grammar::parse_grammar;
use self::prepare_grammar::prepare_grammar;
use self::render::render_c_code;
@ -20,9 +19,8 @@ use self::rules::AliasMap;
use crate::error::{Error, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use std::collections::HashSet;
use std::fs::{self, File};
use std::io::{BufWriter, Write};
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
@ -51,13 +49,11 @@ struct GeneratedParser {
pub fn generate_parser_in_directory(
repo_path: &PathBuf,
grammar_path: Option<&str>,
properties_only: bool,
next_abi: bool,
report_symbol_name: Option<&str>,
) -> Result<()> {
let src_path = repo_path.join("src");
let header_path = src_path.join("tree_sitter");
let properties_dir_path = repo_path.join("properties");
// Ensure that the output directories exist.
fs::create_dir_all(&src_path)?;
@ -82,71 +78,48 @@ pub fn generate_parser_in_directory(
prepare_grammar(&input_grammar)?;
let language_name = input_grammar.name;
// If run with no arguments, read all of the property sheets and compile them to JSON.
if grammar_path.is_none() {
let token_names = get_token_names(&syntax_grammar, &lexical_grammar);
if let Ok(entries) = fs::read_dir(properties_dir_path) {
for entry in entries {
let css_path = entry?.path();
let css = fs::read_to_string(&css_path)?;
let sheet = properties::generate_property_sheet(&css_path, &css, &token_names)?;
let property_sheet_json_path = src_path
.join(css_path.file_name().unwrap())
.with_extension("json");
let property_sheet_json_file =
File::create(&property_sheet_json_path).map_err(Error::wrap(|| {
format!("Failed to create {:?}", property_sheet_json_path)
}))?;
let mut writer = BufWriter::new(property_sheet_json_file);
serde_json::to_writer_pretty(&mut writer, &sheet)?;
}
}
}
// Generate the parser and related files.
if !properties_only {
let GeneratedParser {
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(
&language_name,
syntax_grammar,
lexical_grammar,
inlines,
simple_aliases,
next_abi,
report_symbol_name,
)?;
let GeneratedParser {
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(
&language_name,
syntax_grammar,
lexical_grammar,
inlines,
simple_aliases,
next_abi,
report_symbol_name,
)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
if next_abi {
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
} else {
let mut header = tree_sitter::PARSER_HEADER.to_string();
if next_abi {
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
} else {
let mut header = tree_sitter::PARSER_HEADER.to_string();
for part in &NEW_HEADER_PARTS {
let pos = header
.find(part)
.expect("Missing expected part of parser.h header");
header.replace_range(pos..(pos + part.len()), "");
}
write_file(&header_path.join("parser.h"), header)?;
for part in &NEW_HEADER_PARTS {
let pos = header
.find(part)
.expect("Missing expected part of parser.h header");
header.replace_range(pos..(pos + part.len()), "");
}
ensure_file(&repo_path.join("index.js"), || {
npm_files::index_js(&language_name)
})?;
ensure_file(&src_path.join("binding.cc"), || {
npm_files::binding_cc(&language_name)
})?;
ensure_file(&repo_path.join("binding.gyp"), || {
npm_files::binding_gyp(&language_name)
})?;
write_file(&header_path.join("parser.h"), header)?;
}
ensure_file(&repo_path.join("index.js"), || {
npm_files::index_js(&language_name)
})?;
ensure_file(&src_path.join("binding.cc"), || {
npm_files::binding_cc(&language_name)
})?;
ensure_file(&repo_path.join("binding.gyp"), || {
npm_files::binding_gyp(&language_name)
})?;
Ok(())
}
@ -208,35 +181,6 @@ fn generate_parser_for_grammar_with_opts(
})
}
fn get_token_names(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> HashSet<String> {
let mut result = HashSet::new();
for variable in &lexical_grammar.variables {
if variable.kind == VariableType::Named {
result.insert(variable.name.clone());
}
}
for token in &syntax_grammar.external_tokens {
if token.kind == VariableType::Named {
result.insert(token.name.clone());
}
}
for variable in &syntax_grammar.variables {
for production in &variable.productions {
for step in &production.steps {
if let Some(alias) = &step.alias {
if !step.symbol.is_non_terminal() && alias.is_named {
result.insert(alias.value.clone());
}
}
}
}
}
result
}
fn load_grammar_file(grammar_path: &Path) -> Result<String> {
match grammar_path.extension().and_then(|e| e.to_str()) {
Some("js") => Ok(load_js_grammar_file(grammar_path)?),

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,6 @@
use crate::error::Result;
use crate::loader::Loader;
use ansi_term::{Color, Style};
use ansi_term::Color;
use lazy_static::lazy_static;
use serde::ser::SerializeMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
@ -9,18 +9,52 @@ use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::Instant;
use std::{fmt, fs, io, path, thread};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties};
use std::{fs, io, path, str, thread, usize};
use tree_sitter_highlight::{
HighlightConfiguration, HighlightContext, HighlightEvent, Highlighter, HtmlRenderer,
};
pub const HTML_HEADER: &'static str = "
<!doctype HTML>
<head>
<title>Tree-sitter Highlighting</title>
<style>
body {
font-family: monospace
}
.line-number {
user-select: none;
text-align: right;
color: rgba(27,31,35,.3);
padding: 0 10px;
}
.line {
white-space: pre;
}
</style>
</head>
<body>
";
pub const HTML_FOOTER: &'static str = "
</body>
";
lazy_static! {
static ref CSS_STYLES_BY_COLOR_ID: Vec<String> =
serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap();
}
#[derive(Debug, Default)]
pub struct Style {
pub ansi: ansi_term::Style,
pub css: Option<String>,
}
#[derive(Debug)]
pub struct Theme {
ansi_styles: Vec<Option<Style>>,
css_styles: Vec<Option<String>>,
pub highlighter: Highlighter,
styles: Vec<Style>,
}
impl Theme {
@ -29,14 +63,8 @@ impl Theme {
Ok(serde_json::from_str(&json).unwrap_or_default())
}
fn ansi_style(&self, highlight: Highlight) -> Option<&Style> {
self.ansi_styles[highlight as usize].as_ref()
}
fn css_style(&self, highlight: Highlight) -> Option<&str> {
self.css_styles[highlight as usize]
.as_ref()
.map(|s| s.as_str())
pub fn default_style(&self) -> Style {
Style::default()
}
}
@ -45,20 +73,21 @@ impl<'de> Deserialize<'de> for Theme {
where
D: Deserializer<'de>,
{
let highlight_count = Highlight::Unknown as usize + 1;
let mut ansi_styles = vec![None; highlight_count];
let mut css_styles = vec![None; highlight_count];
if let Ok(colors) = HashMap::<Highlight, Value>::deserialize(deserializer) {
for (highlight, style_value) in colors {
let mut names = Vec::new();
let mut styles = Vec::new();
if let Ok(colors) = HashMap::<String, Value>::deserialize(deserializer) {
names.reserve(colors.len());
styles.reserve(colors.len());
for (name, style_value) in colors {
let mut style = Style::default();
parse_style(&mut style, style_value);
ansi_styles[highlight as usize] = Some(style);
css_styles[highlight as usize] = Some(style_to_css(style));
names.push(name);
styles.push(style);
}
}
Ok(Self {
ansi_styles,
css_styles,
highlighter: Highlighter::new(names),
styles,
})
}
}
@ -68,48 +97,40 @@ impl Serialize for Theme {
where
S: Serializer,
{
let entry_count = self.ansi_styles.iter().filter(|i| i.is_some()).count();
let mut map = serializer.serialize_map(Some(entry_count))?;
for (i, style) in self.ansi_styles.iter().enumerate() {
let highlight = Highlight::from_usize(i).unwrap();
if highlight == Highlight::Unknown {
break;
}
if let Some(style) = style {
let color = style.foreground.map(|color| match color {
Color::Black => json!("black"),
Color::Blue => json!("blue"),
Color::Cyan => json!("cyan"),
Color::Green => json!("green"),
Color::Purple => json!("purple"),
Color::Red => json!("red"),
Color::White => json!("white"),
Color::Yellow => json!("yellow"),
Color::RGB(r, g, b) => json!(format!("#{:x?}{:x?}{:x?}", r, g, b)),
Color::Fixed(n) => json!(n),
});
if style.is_bold || style.is_italic || style.is_underline {
let mut entry = HashMap::new();
if let Some(color) = color {
entry.insert("color", color);
}
if style.is_bold {
entry.insert("bold", Value::Bool(true));
}
if style.is_italic {
entry.insert("italic", Value::Bool(true));
}
if style.is_underline {
entry.insert("underline", Value::Bool(true));
}
map.serialize_entry(&highlight, &entry)?;
} else if let Some(color) = color {
map.serialize_entry(&highlight, &color)?;
} else {
map.serialize_entry(&highlight, &Value::Null)?;
let mut map = serializer.serialize_map(Some(self.styles.len()))?;
for (name, style) in self.highlighter.names().iter().zip(&self.styles) {
let style = &style.ansi;
let color = style.foreground.map(|color| match color {
Color::Black => json!("black"),
Color::Blue => json!("blue"),
Color::Cyan => json!("cyan"),
Color::Green => json!("green"),
Color::Purple => json!("purple"),
Color::Red => json!("red"),
Color::White => json!("white"),
Color::Yellow => json!("yellow"),
Color::RGB(r, g, b) => json!(format!("#{:x?}{:x?}{:x?}", r, g, b)),
Color::Fixed(n) => json!(n),
});
if style.is_bold || style.is_italic || style.is_underline {
let mut style_json = HashMap::new();
if let Some(color) = color {
style_json.insert("color", color);
}
if style.is_bold {
style_json.insert("bold", Value::Bool(true));
}
if style.is_italic {
style_json.insert("italic", Value::Bool(true));
}
if style.is_underline {
style_json.insert("underline", Value::Bool(true));
}
map.serialize_entry(&name, &style_json)?;
} else if let Some(color) = color {
map.serialize_entry(&name, &color)?;
} else {
map.serialize_entry(&highlight, &Value::Null)?;
map.serialize_entry(&name, &Value::Null)?;
}
}
map.end()
@ -149,42 +170,27 @@ impl Default for Theme {
}
}
impl fmt::Debug for Theme {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
let mut first = true;
for (i, style) in self.ansi_styles.iter().enumerate() {
if let Some(style) = style {
let highlight = Highlight::from_usize(i).unwrap();
if !first {
write!(f, ", ")?;
}
write!(f, "{:?}: {:?}", highlight, style)?;
first = false;
}
}
write!(f, "}}")?;
Ok(())
}
}
fn parse_style(style: &mut Style, json: Value) {
if let Value::Object(entries) = json {
for (property_name, value) in entries {
match property_name.as_str() {
"bold" => *style = style.bold(),
"italic" => *style = style.italic(),
"underline" => *style = style.underline(),
"bold" => style.ansi = style.ansi.bold(),
"italic" => style.ansi = style.ansi.italic(),
"underline" => style.ansi = style.ansi.underline(),
"color" => {
if let Some(color) = parse_color(value) {
*style = style.fg(color);
style.ansi = style.ansi.fg(color);
}
}
_ => {}
}
}
style.css = Some(style_to_css(style.ansi));
} else if let Some(color) = parse_color(json) {
*style = style.fg(color);
style.ansi = style.ansi.fg(color);
style.css = Some(style_to_css(style.ansi));
} else {
style.css = None;
}
}
@ -223,7 +229,7 @@ fn parse_color(json: Value) -> Option<Color> {
}
}
fn style_to_css(style: Style) -> String {
fn style_to_css(style: ansi_term::Style) -> String {
use std::fmt::Write;
let mut result = "style='".to_string();
if style.is_bold {
@ -271,108 +277,83 @@ pub fn ansi(
loader: &Loader,
theme: &Theme,
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
config: &HighlightConfiguration,
print_time: bool,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let cancellation_flag = cancel_on_stdin();
let time = Instant::now();
let mut highlight_stack = Vec::new();
for event in highlight(
let cancellation_flag = cancel_on_stdin();
let mut context = HighlightContext::new();
let events = theme.highlighter.highlight(
&mut context,
config,
source,
language,
property_sheet,
Some(cancellation_flag.as_ref()),
|s| language_for_injection_string(loader, s),
)
.map_err(|e| e.to_string())?
{
let event = event.map_err(|e| e.to_string())?;
match event {
HighlightEvent::Source { start, end } => {
if let Some(style) = highlight_stack.last().and_then(|s| theme.ansi_style(*s)) {
style.paint(&source[start..end]).write_to(&mut stdout)?;
} else {
stdout.write_all(&source[start..end])?;
}
}
HighlightEvent::HighlightStart(h) => {
highlight_stack.push(h);
Some(&cancellation_flag),
|string| language_for_injection_string(loader, theme, string),
)?;
let mut style_stack = vec![theme.default_style().ansi];
for event in events {
match event? {
HighlightEvent::HighlightStart(highlight) => {
style_stack.push(theme.styles[highlight.0].ansi);
}
HighlightEvent::HighlightEnd => {
highlight_stack.pop();
style_stack.pop();
}
HighlightEvent::Source { start, end } => {
style_stack
.last()
.unwrap()
.paint(&source[start..end])
.write_to(&mut stdout)?;
}
}
}
if print_time {
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
eprintln!("{} ms", duration_ms);
eprintln!("Time: {}ms", time.elapsed().as_millis());
}
Ok(())
}
pub const HTML_HEADER: &'static str = "
<!doctype HTML>
<head>
<title>Tree-sitter Highlighting</title>
<style>
body {
font-family: monospace
}
.line-number {
user-select: none;
text-align: right;
color: rgba(27,31,35,.3);
padding: 0 10px;
}
.line {
white-space: pre;
}
</style>
</head>
<body>
";
pub const HTML_FOOTER: &'static str = "
</body>
";
pub fn html(
loader: &Loader,
theme: &Theme,
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
config: &HighlightConfiguration,
print_time: bool,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
write!(&mut stdout, "<table>\n")?;
let time = Instant::now();
let cancellation_flag = cancel_on_stdin();
let lines = highlight_html(
let mut context = HighlightContext::new();
let events = theme.highlighter.highlight(
&mut context,
config,
source,
language,
property_sheet,
Some(cancellation_flag.as_ref()),
|s| language_for_injection_string(loader, s),
|highlight| {
if let Some(css_style) = theme.css_style(highlight) {
css_style
} else {
""
}
},
)
.map_err(|e| e.to_string())?;
for (i, line) in lines.into_iter().enumerate() {
Some(&cancellation_flag),
|string| language_for_injection_string(loader, theme, string),
)?;
let mut renderer = HtmlRenderer::new();
renderer.render(events, source, &move |highlight| {
if let Some(css_style) = &theme.styles[highlight.0].css {
css_style.as_bytes()
} else {
"".as_bytes()
}
})?;
write!(&mut stdout, "<table>\n")?;
for (i, line) in renderer.lines().enumerate() {
write!(
&mut stdout,
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
@ -380,14 +361,21 @@ pub fn html(
line
)?;
}
write!(&mut stdout, "</table>\n")?;
if print_time {
eprintln!("Time: {}ms", time.elapsed().as_millis());
}
Ok(())
}
fn language_for_injection_string<'a>(
loader: &'a Loader,
theme: &Theme,
string: &str,
) -> Option<(Language, &'a PropertySheet<Properties>)> {
) -> Option<&'a HighlightConfiguration> {
match loader.language_configuration_for_injection_string(string) {
Err(e) => {
eprintln!(
@ -399,7 +387,7 @@ fn language_for_injection_string<'a>(
}
Ok(None) => None,
Ok(Some((language, configuration))) => {
match configuration.highlight_property_sheet(language) {
match configuration.highlight_config(&theme.highlighter, language) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
@ -409,7 +397,7 @@ fn language_for_injection_string<'a>(
None
}
Ok(None) => None,
Ok(Some(sheet)) => Some((language, sheet)),
Ok(Some(config)) => Some(config),
}
}
}

View file

@ -9,8 +9,8 @@ use std::path::{Path, PathBuf};
use std::process::Command;
use std::time::SystemTime;
use std::{fs, mem};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{load_property_sheet, Properties};
use tree_sitter::Language;
use tree_sitter_highlight::{HighlightConfiguration, Highlighter};
#[cfg(unix)]
const DYLIB_EXTENSION: &'static str = "so";
@ -27,9 +27,12 @@ pub struct LanguageConfiguration {
pub _first_line_regex: Option<Regex>,
pub injection_regex: Option<Regex>,
pub file_types: Vec<String>,
pub highlight_property_sheet_path: Option<PathBuf>,
pub root_path: PathBuf,
pub highlights_filenames: Option<Vec<String>>,
pub injections_filenames: Option<Vec<String>>,
pub locals_filenames: Option<Vec<String>>,
language_id: usize,
highlight_property_sheet: OnceCell<Option<PropertySheet<Properties>>>,
highlight_config: OnceCell<Option<HighlightConfiguration>>,
}
pub struct Loader {
@ -134,7 +137,6 @@ impl Loader {
if configuration_ids.len() == 1 {
configuration = &self.language_configurations[configuration_ids[0]];
}
// If multiple language configurations match, then determine which
// one to use by applying the configurations' content regexes.
else {
@ -151,7 +153,6 @@ impl Loader {
if let Some(mat) = content_regex.find(&file_contents) {
score = (mat.end() - mat.start()) as isize;
}
// If the content regex does not match, then *penalize* this
// language configuration, so that language configurations
// without content regexes are preferred over those with
@ -342,6 +343,30 @@ impl Loader {
&'a mut self,
parser_path: &Path,
) -> Result<&[LanguageConfiguration]> {
#[derive(Deserialize)]
#[serde(untagged)]
enum PathsJSON {
Empty,
Single(String),
Multiple(Vec<String>),
}
impl Default for PathsJSON {
fn default() -> Self {
PathsJSON::Empty
}
}
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
PathsJSON::Empty => None,
PathsJSON::Single(s) => Some(vec![s]),
PathsJSON::Multiple(s) => Some(s),
}
}
}
#[derive(Deserialize)]
struct LanguageConfigurationJSON {
#[serde(default)]
@ -355,7 +380,12 @@ impl Loader {
first_line_regex: Option<String>,
#[serde(rename = "injection-regex")]
injection_regex: Option<String>,
highlights: Option<String>,
#[serde(default)]
highlights: PathsJSON,
#[serde(default)]
injections: PathsJSON,
#[serde(default)]
locals: PathsJSON,
}
#[derive(Deserialize)]
@ -394,6 +424,7 @@ impl Loader {
});
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
scope: config_json.scope,
language_id,
file_types: config_json.file_types.unwrap_or(Vec::new()),
@ -406,10 +437,10 @@ impl Loader {
injection_regex: config_json
.injection_regex
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
highlight_property_sheet_path: config_json
.highlights
.map(|h| parser_path.join(h)),
highlight_property_sheet: OnceCell::new(),
highlight_config: OnceCell::new(),
injections_filenames: config_json.injections.into_vec(),
locals_filenames: config_json.locals.into_vec(),
highlights_filenames: config_json.highlights.into_vec(),
};
for file_type in &configuration.file_types {
@ -427,16 +458,10 @@ impl Loader {
if self.language_configurations.len() == initial_language_configuration_count
&& parser_path.join("src").join("grammar.json").exists()
{
self.language_configurations.push(LanguageConfiguration {
language_id: self.languages_by_id.len(),
scope: None,
content_regex: None,
injection_regex: None,
file_types: Vec::new(),
_first_line_regex: None,
highlight_property_sheet_path: None,
highlight_property_sheet: OnceCell::new(),
});
let mut configuration = LanguageConfiguration::default();
configuration.root_path = parser_path.to_owned();
configuration.language_id = self.languages_by_id.len();
self.language_configurations.push(configuration);
self.languages_by_id
.push((parser_path.to_owned(), OnceCell::new()));
}
@ -446,29 +471,55 @@ impl Loader {
}
impl LanguageConfiguration {
pub fn highlight_property_sheet(
pub fn highlight_config(
&self,
highlighter: &Highlighter,
language: Language,
) -> Result<Option<&PropertySheet<Properties>>> {
self.highlight_property_sheet
) -> Result<Option<&HighlightConfiguration>> {
self.highlight_config
.get_or_try_init(|| {
if let Some(path) = &self.highlight_property_sheet_path {
let sheet_json = fs::read_to_string(path).map_err(Error::wrap(|| {
format!(
"Failed to read property sheet {:?}",
path.file_name().unwrap()
)
}))?;
let sheet =
load_property_sheet(language, &sheet_json).map_err(Error::wrap(|| {
format!(
"Failed to parse property sheet {:?}",
path.file_name().unwrap()
)
}))?;
Ok(Some(sheet))
} else {
let queries_path = self.root_path.join("queries");
let read_queries = |paths: &Option<Vec<String>>, default_path: &str| {
if let Some(paths) = paths.as_ref() {
let mut query = String::new();
for path in paths {
let path = self.root_path.join(path);
query += &fs::read_to_string(&path).map_err(Error::wrap(|| {
format!("Failed to read query file {:?}", path)
}))?;
}
Ok(query)
} else {
let path = queries_path.join(default_path);
if path.exists() {
fs::read_to_string(&path).map_err(Error::wrap(|| {
format!("Failed to read query file {:?}", path)
}))
} else {
Ok(String::new())
}
}
};
let highlights_query = read_queries(&self.highlights_filenames, "highlights.scm")?;
let injections_query = read_queries(&self.injections_filenames, "injections.scm")?;
let locals_query = read_queries(&self.locals_filenames, "locals.scm")?;
if highlights_query.is_empty() {
Ok(None)
} else {
Ok(Some(
highlighter
.load_configuration(
language,
&highlights_query,
&injections_query,
&locals_query,
)
.map_err(Error::wrap(|| {
format!("Failed to load queries in {:?}", self.root_path)
}))?,
))
}
})
.map(Option::as_ref)

View file

@ -40,7 +40,6 @@ fn run() -> error::Result<()> {
.arg(Arg::with_name("grammar-path").index(1))
.arg(Arg::with_name("log").long("log"))
.arg(Arg::with_name("next-abi").long("next-abi"))
.arg(Arg::with_name("properties-only").long("properties"))
.arg(
Arg::with_name("report-states-for-rule")
.long("report-states-for-rule")
@ -110,7 +109,8 @@ fn run() -> error::Result<()> {
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("html").long("html").short("h"))
.arg(Arg::with_name("time").long("time").short("t")),
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("q").short("q")),
)
.subcommand(
SubCommand::with_name("build-wasm")
@ -141,7 +141,6 @@ fn run() -> error::Result<()> {
config.save(&home_dir)?;
} else if let Some(matches) = matches.subcommand_matches("generate") {
let grammar_path = matches.value_of("grammar-path");
let properties_only = matches.is_present("properties-only");
let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
if matches.is_present("report-states") {
Some("")
@ -156,7 +155,6 @@ fn run() -> error::Result<()> {
generate::generate_parser_in_directory(
&current_dir,
grammar_path,
properties_only,
next_abi,
report_symbol_name,
)?;
@ -260,15 +258,18 @@ fn run() -> error::Result<()> {
},
};
if let Some(sheet) = language_config.highlight_property_sheet(language)? {
let source = fs::read(path)?;
let source = fs::read(path)?;
if let Some(highlight_config) =
language_config.highlight_config(&config.theme.highlighter, language)?
{
if html_mode {
highlight::html(&loader, &config.theme, &source, language, sheet)?;
highlight::html(&loader, &config.theme, &source, highlight_config, time)?;
} else {
highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?;
highlight::ansi(&loader, &config.theme, &source, highlight_config, time)?;
}
} else {
return Error::err(format!("No syntax highlighting property sheet specified"));
return Error::err(format!("No syntax highlighting query found"));
}
}
} else if let Some(matches) = matches.subcommand_matches("build-wasm") {
@ -280,10 +281,17 @@ fn run() -> error::Result<()> {
loader.find_all_languages(&config.parser_directories)?;
for (configuration, language_path) in loader.get_all_language_configurations() {
println!(
"scope: {}\nparser: {:?}\nproperties: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n",
concat!(
"scope: {}\n",
"parser: {:?}\n",
"highlights: {:?}\n",
"file_types: {:?}\n",
"content_regex: {:?}\n",
"injection_regex: {:?}\n",
),
configuration.scope.as_ref().unwrap_or(&String::new()),
language_path,
configuration.highlight_property_sheet_path,
configuration.highlights_filenames,
configuration.file_types,
configuration.content_regex,
configuration.injection_regex,

View file

@ -2,8 +2,8 @@ use crate::loader::Loader;
use lazy_static::lazy_static;
use std::fs;
use std::path::{Path, PathBuf};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{load_property_sheet, Properties};
use tree_sitter::Language;
use tree_sitter_highlight::{HighlightConfiguration, Highlighter};
include!("./dirs.rs");
@ -21,18 +21,28 @@ pub fn get_language(name: &str) -> Language {
.unwrap()
}
pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String {
let path = GRAMMARS_DIR
.join(language_name)
.join("src")
.join(sheet_name);
fs::read_to_string(path).unwrap()
pub fn get_language_queries_path(language_name: &str) -> PathBuf {
GRAMMARS_DIR.join(language_name).join("queries")
}
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
let json = get_property_sheet_json(language_name, sheet_name);
pub fn get_highlight_config(
highlighter: &Highlighter,
language_name: &str,
injection_query_filename: &str,
) -> HighlightConfiguration {
let language = get_language(language_name);
load_property_sheet(language, &json).unwrap()
let queries_path = get_language_queries_path(language_name);
let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries_path.join(injection_query_filename)).unwrap();
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new());
highlighter
.load_configuration(
language,
&highlights_query,
&injections_query,
&locals_query,
)
.unwrap()
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {

View file

@ -1,32 +1,88 @@
use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json};
use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
use lazy_static::lazy_static;
use std::ffi::CString;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{ptr, slice, str};
use tree_sitter::{Language, PropertySheet};
use std::{fs, ptr, slice, str};
use tree_sitter_highlight::{
c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties,
c, Error, HighlightConfiguration, HighlightContext, HighlightEvent, Highlighter, HtmlRenderer,
};
lazy_static! {
static ref JS_SHEET: PropertySheet<Properties> =
get_property_sheet("javascript", "highlights.json");
static ref HTML_SHEET: PropertySheet<Properties> =
get_property_sheet("html", "highlights.json");
static ref EJS_SHEET: PropertySheet<Properties> =
get_property_sheet("embedded-template", "highlights-ejs.json");
static ref RUST_SHEET: PropertySheet<Properties> =
get_property_sheet("rust", "highlights.json");
static ref SCOPE_CLASS_STRINGS: Vec<String> = {
let mut result = Vec::new();
let mut i = 0;
while let Some(highlight) = Highlight::from_usize(i) {
result.push(format!("class={:?}", highlight));
i += 1;
}
result
};
static ref JS_HIGHLIGHT: HighlightConfiguration =
get_highlight_config(&HIGHLIGHTER, "javascript", "injections.scm");
static ref HTML_HIGHLIGHT: HighlightConfiguration =
get_highlight_config(&HIGHLIGHTER, "html", "injections.scm");
static ref EJS_HIGHLIGHT: HighlightConfiguration =
get_highlight_config(&HIGHLIGHTER, "embedded-template", "injections-ejs.scm");
static ref RUST_HIGHLIGHT: HighlightConfiguration =
get_highlight_config(&HIGHLIGHTER, "rust", "injections.scm");
static ref HIGHLIGHTER: Highlighter = Highlighter::new(
[
"attribute",
"constant",
"constructor",
"function.builtin",
"function",
"embedded",
"keyword",
"operator",
"property.builtin",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
"string",
"tag",
"type.builtin",
"type",
"variable.builtin",
"variable.parameter",
"variable",
]
.iter()
.cloned()
.map(String::from)
.collect()
);
static ref HTML_ATTRS: Vec<String> = HIGHLIGHTER
.names()
.iter()
.map(|s| format!("class={}", s))
.collect();
}
#[test]
fn test_highlighting_javascript() {
let source = "const a = function(b) { return b + c; }";
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
("a", vec!["function"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("function", vec!["keyword"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("return", vec!["keyword"]),
(" ", vec![]),
("b", vec!["variable.parameter"]),
(" ", vec![]),
("+", vec!["operator"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"]),
]]
);
}
#[test]
@ -34,57 +90,65 @@ fn test_highlighting_injected_html_in_javascript() {
let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec![Highlight::Keyword]),
("const", vec!["keyword"]),
(" ", vec![]),
("s", vec![Highlight::Variable]),
("s", vec!["variable"]),
(" ", vec![]),
("=", vec![Highlight::Operator]),
("=", vec!["operator"]),
(" ", vec![]),
("html", vec![Highlight::Function]),
("html", vec!["function"]),
(" ", vec![]),
("`<", vec![Highlight::String]),
("div", vec![Highlight::String, Highlight::Tag]),
(">", vec![Highlight::String]),
(
"${",
vec![
Highlight::String,
Highlight::Embedded,
Highlight::PunctuationSpecial
]
),
(
"a",
vec![Highlight::String, Highlight::Embedded, Highlight::Variable]
),
(" ", vec![Highlight::String, Highlight::Embedded]),
(
"<",
vec![Highlight::String, Highlight::Embedded, Highlight::Operator]
),
(" ", vec![Highlight::String, Highlight::Embedded]),
(
"b",
vec![Highlight::String, Highlight::Embedded, Highlight::Variable]
),
(
"}",
vec![
Highlight::String,
Highlight::Embedded,
Highlight::PunctuationSpecial
]
),
("</", vec![Highlight::String]),
("div", vec![Highlight::String, Highlight::Tag]),
(">`", vec![Highlight::String]),
(";", vec![Highlight::PunctuationDelimiter]),
("`", vec!["string"]),
("<", vec!["string", "punctuation.bracket"]),
("div", vec!["string", "tag"]),
(">", vec!["string", "punctuation.bracket"]),
("${", vec!["string", "embedded", "punctuation.special"]),
("a", vec!["string", "embedded", "variable"]),
(" ", vec!["string", "embedded"]),
("<", vec!["string", "embedded", "operator"]),
(" ", vec!["string", "embedded"]),
("b", vec!["string", "embedded", "variable"]),
("}", vec!["string", "embedded", "punctuation.special"]),
("</", vec!["string", "punctuation.bracket"]),
("div", vec!["string", "tag"]),
(">", vec!["string", "punctuation.bracket"]),
("`", vec!["string"]),
(";", vec!["punctuation.delimiter"]),
]]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html_mini() {
let source = "<script>const x = new Thing();</script>";
assert_eq!(
&to_token_vector(source, &HTML_HIGHLIGHT).unwrap(),
&[vec![
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("const", vec!["keyword"]),
(" ", vec![]),
("x", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("new", vec!["keyword"]),
(" ", vec![]),
("Thing", vec!["constructor"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = vec![
@ -97,38 +161,44 @@ fn test_highlighting_injected_javascript_in_html() {
.join("\n");
assert_eq!(
&to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(),
&to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(),
&[
vec![("<", vec![]), ("body", vec![Highlight::Tag]), (">", vec![]),],
vec![
(" <", vec![]),
("script", vec![Highlight::Tag]),
(">", vec![]),
("<", vec!["punctuation.bracket"]),
("body", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
(" ", vec![]),
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
(" ", vec![]),
("const", vec![Highlight::Keyword]),
("const", vec!["keyword"]),
(" ", vec![]),
("x", vec![Highlight::Variable]),
("x", vec!["variable"]),
(" ", vec![]),
("=", vec![Highlight::Operator]),
("=", vec!["operator"]),
(" ", vec![]),
("new", vec![Highlight::Keyword]),
("new", vec!["keyword"]),
(" ", vec![]),
("Thing", vec![Highlight::Constructor]),
("(", vec![Highlight::PunctuationBracket]),
(")", vec![Highlight::PunctuationBracket]),
(";", vec![Highlight::PunctuationDelimiter]),
("Thing", vec!["constructor"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
vec![
(" </", vec![]),
("script", vec![Highlight::Tag]),
(">", vec![]),
(" ", vec![]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
("</", vec![]),
("body", vec![Highlight::Tag]),
(">", vec![]),
("</", vec!["punctuation.bracket"]),
("body", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
]
);
@ -147,13 +217,13 @@ fn test_highlighting_multiline_nodes_to_html() {
.join("\n");
assert_eq!(
&to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=Keyword>const</span> <span class=Constant>SOMETHING</span> <span class=Operator>=</span> <span class=String>`</span>\n".to_string(),
"<span class=String> one <span class=Embedded><span class=PunctuationSpecial>${</span></span></span>\n".to_string(),
"<span class=String><span class=Embedded> <span class=Function>two</span><span class=PunctuationBracket>(</span><span class=PunctuationBracket>)</span></span></span>\n".to_string(),
"<span class=String><span class=Embedded> <span class=PunctuationSpecial>}</span></span> three</span>\n".to_string(),
"<span class=String>`</span>\n".to_string(),
"<span class=keyword>const</span> <span class=constant>SOMETHING</span> <span class=operator>=</span> <span class=string>`</span>\n".to_string(),
"<span class=string> one <span class=embedded><span class=punctuation.special>${</span></span></span>\n".to_string(),
"<span class=string><span class=embedded> <span class=function>two</span><span class=punctuation.bracket>(</span><span class=punctuation.bracket>)</span></span></span>\n".to_string(),
"<span class=string><span class=embedded> <span class=punctuation.special>}</span></span> three</span>\n".to_string(),
"<span class=string>`</span>\n".to_string(),
]
);
}
@ -169,51 +239,51 @@ fn test_highlighting_with_local_variable_tracking() {
.join("\n");
assert_eq!(
&to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[
vec![
("module", vec![Highlight::VariableBuiltin]),
(".", vec![Highlight::PunctuationDelimiter]),
("exports", vec![Highlight::Property]),
("module", vec!["variable.builtin"]),
(".", vec!["punctuation.delimiter"]),
("exports", vec!["function"]),
(" ", vec![]),
("=", vec![Highlight::Operator]),
("=", vec!["operator"]),
(" ", vec![]),
("function", vec![Highlight::Keyword]),
("function", vec!["keyword"]),
(" ", vec![]),
("a", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
("b", vec![Highlight::VariableParameter]),
(")", vec![Highlight::PunctuationBracket]),
("a", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec![Highlight::PunctuationBracket])
("{", vec!["punctuation.bracket"])
],
vec![
(" ", vec![]),
("const", vec![Highlight::Keyword]),
("const", vec!["keyword"]),
(" ", vec![]),
("module", vec![Highlight::Variable]),
("module", vec!["variable"]),
(" ", vec![]),
("=", vec![Highlight::Operator]),
("=", vec!["operator"]),
(" ", vec![]),
("c", vec![Highlight::Variable]),
(";", vec![Highlight::PunctuationDelimiter])
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"])
],
vec![
(" ", vec![]),
("console", vec![Highlight::VariableBuiltin]),
(".", vec![Highlight::PunctuationDelimiter]),
("log", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
("console", vec!["variable.builtin"]),
(".", vec!["punctuation.delimiter"]),
("log", vec!["function"]),
("(", vec!["punctuation.bracket"]),
// Not a builtin, because `module` was defined as a variable above.
("module", vec![Highlight::Variable]),
(",", vec![Highlight::PunctuationDelimiter]),
("module", vec!["variable"]),
(",", vec!["punctuation.delimiter"]),
(" ", vec![]),
// A parameter, because `b` was defined as a parameter above.
("b", vec![Highlight::VariableParameter]),
(")", vec![Highlight::PunctuationBracket]),
(";", vec![Highlight::PunctuationDelimiter]),
("b", vec!["variable.parameter"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
vec![("}", vec![Highlight::PunctuationBracket])]
vec![("}", vec!["punctuation.bracket"])]
],
);
}
@ -234,17 +304,17 @@ fn test_highlighting_empty_lines() {
.join("\n");
assert_eq!(
&to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
&to_html(&source, &JS_HIGHLIGHT,).unwrap(),
&[
"<span class=Keyword>class</span> <span class=Constructor>A</span> <span class=PunctuationBracket>{</span>\n".to_string(),
"<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=Function>b</span><span class=PunctuationBracket>(</span><span class=VariableParameter>c</span><span class=PunctuationBracket>)</span> <span class=PunctuationBracket>{</span>\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable.parameter>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=Function>d</span><span class=PunctuationBracket>(</span><span class=Variable>e</span><span class=PunctuationBracket>)</span>\n".to_string(),
" <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
"\n".to_string(),
" <span class=PunctuationBracket>}</span>\n".to_string(),
" <span class=punctuation.bracket>}</span>\n".to_string(),
"\n".to_string(),
"<span class=PunctuationBracket>}</span>\n".to_string(),
"<span class=punctuation.bracket>}</span>\n".to_string(),
]
);
}
@ -254,21 +324,21 @@ fn test_highlighting_ejs() {
let source = vec!["<div><% foo() %></div>"].join("\n");
assert_eq!(
&to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(),
&to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
&[[
("<", vec![]),
("div", vec![Highlight::Tag]),
(">", vec![]),
("<%", vec![Highlight::Keyword]),
("<", vec!["punctuation.bracket"]),
("div", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("<%", vec!["keyword"]),
(" ", vec![]),
("foo", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
(")", vec![Highlight::PunctuationBracket]),
("foo", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("%>", vec![Highlight::Keyword]),
("</", vec![]),
("div", vec![Highlight::Tag]),
(">", vec![])
("%>", vec!["keyword"]),
("</", vec!["punctuation.bracket"]),
("div", vec!["tag"]),
(">", vec!["punctuation.bracket"])
]],
);
}
@ -278,33 +348,36 @@ fn test_highlighting_with_content_children_included() {
let source = vec!["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
assert_eq!(
&to_token_vector(&source, get_language("rust"), &RUST_SHEET).unwrap(),
&to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
&[
vec![
("assert", vec![Highlight::Function]),
("!", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
("assert", vec!["function"]),
("!", vec!["function"]),
("(", vec!["punctuation.bracket"]),
],
vec![
(" a", vec![]),
(".", vec![Highlight::PunctuationDelimiter]),
("b", vec![Highlight::Property]),
(".", vec![Highlight::PunctuationDelimiter]),
("c", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
(")", vec![Highlight::PunctuationBracket]),
(".", vec!["punctuation.delimiter"]),
("b", vec!["property"]),
(".", vec!["punctuation.delimiter"]),
("c", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" < ", vec![]),
("D", vec![Highlight::Type]),
("::", vec![Highlight::PunctuationDelimiter]),
("e", vec![Highlight::Function]),
("::", vec![Highlight::PunctuationDelimiter]),
("<", vec![Highlight::PunctuationBracket]),
("F", vec![Highlight::Type]),
(">", vec![Highlight::PunctuationBracket]),
("(", vec![Highlight::PunctuationBracket]),
(")", vec![Highlight::PunctuationBracket]),
("D", vec!["type"]),
("::", vec!["punctuation.delimiter"]),
("e", vec!["function"]),
("::", vec!["punctuation.delimiter"]),
("<", vec!["punctuation.bracket"]),
("F", vec!["type"]),
(">", vec!["punctuation.bracket"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
],
vec![(")", vec![Highlight::PunctuationBracket]), (";", vec![]),]
vec![
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
]
],
);
}
@ -327,18 +400,20 @@ fn test_highlighting_cancellation() {
// Constructing the highlighter, which eagerly parses the outer document,
// should not fail.
let highlighter = highlight(
source.as_bytes(),
get_language("html"),
&HTML_SHEET,
Some(&cancellation_flag),
injection_callback,
)
.unwrap();
let mut context = HighlightContext::new();
let events = HIGHLIGHTER
.highlight(
&mut context,
&HTML_HIGHLIGHT,
source.as_bytes(),
Some(&cancellation_flag),
injection_callback,
)
.unwrap();
// Iterating the scopes should not panic. It should return an error
// once the cancellation is detected.
for event in highlighter {
for event in events {
if let Err(e) = event {
assert_eq!(e, Error::Cancelled);
return;
@ -349,49 +424,72 @@ fn test_highlighting_cancellation() {
#[test]
fn test_highlighting_via_c_api() {
let js_lang = get_language("javascript");
let html_lang = get_language("html");
let js_sheet = get_property_sheet_json("javascript", "highlights.json");
let js_sheet = c_string(&js_sheet);
let html_sheet = get_property_sheet_json("html", "highlights.json");
let html_sheet = c_string(&html_sheet);
let highlights = vec![
"class=tag\0",
"class=function\0",
"class=string\0",
"class=keyword\0",
];
let highlight_names = highlights
.iter()
.map(|h| h["class=".len()..].as_ptr() as *const i8)
.collect::<Vec<_>>();
let highlight_attrs = highlights
.iter()
.map(|h| h.as_bytes().as_ptr() as *const i8)
.collect::<Vec<_>>();
let highlighter = c::ts_highlighter_new(
&highlight_names[0] as *const *const i8,
&highlight_attrs[0] as *const *const i8,
highlights.len() as u32,
);
let class_tag = c_string("class=tag");
let class_function = c_string("class=function");
let class_string = c_string("class=string");
let class_keyword = c_string("class=keyword");
let js_scope_name = c_string("source.js");
let html_scope_name = c_string("text.html.basic");
let injection_regex = c_string("^(javascript|js)$");
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
let attribute_strings = &mut [ptr::null(); Highlight::Unknown as usize + 1];
attribute_strings[Highlight::Tag as usize] = class_tag.as_ptr();
attribute_strings[Highlight::String as usize] = class_string.as_ptr();
attribute_strings[Highlight::Keyword as usize] = class_keyword.as_ptr();
attribute_strings[Highlight::Function as usize] = class_function.as_ptr();
let js_scope = c_string("source.js");
let js_injection_regex = c_string("^javascript");
let language = get_language("javascript");
let queries = get_language_queries_path("javascript");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const i8,
injections_query.as_ptr() as *const i8,
locals_query.as_ptr() as *const i8,
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
);
let html_scope = c_string("text.html.basic");
let html_injection_regex = c_string("^html");
let language = get_language("html");
let queries = get_language_queries_path("html");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const i8,
injections_query.as_ptr() as *const i8,
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
);
let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr());
let buffer = c::ts_highlight_buffer_new();
c::ts_highlighter_add_language(
highlighter,
html_scope_name.as_ptr(),
html_lang,
html_sheet.as_ptr(),
ptr::null_mut(),
);
c::ts_highlighter_add_language(
highlighter,
js_scope_name.as_ptr(),
js_lang,
js_sheet.as_ptr(),
injection_regex.as_ptr(),
);
c::ts_highlighter_highlight(
highlighter,
html_scope_name.as_ptr(),
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
@ -421,8 +519,8 @@ fn test_highlighting_via_c_api() {
lines,
vec![
"&lt;<span class=tag>script</span>&gt;\n",
"<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>&#39;c&#39;</span><span>)</span><span>;</span>\n",
"<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>\n",
"<span class=keyword>const</span> a = <span class=function>b</span>(<span class=string>&#39;c&#39;</span>);\n",
"c.<span class=function>d</span>();\n",
"&lt;/<span class=tag>script</span>&gt;\n",
]
);
@ -452,50 +550,55 @@ fn c_string(s: &str) -> CString {
CString::new(s.as_bytes().to_vec()).unwrap()
}
fn test_language_for_injection_string<'a>(
string: &str,
) -> Option<(Language, &'a PropertySheet<Properties>)> {
fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> {
match string {
"javascript" => Some((get_language("javascript"), &JS_SHEET)),
"html" => Some((get_language("html"), &HTML_SHEET)),
"rust" => Some((get_language("rust"), &RUST_SHEET)),
"javascript" => Some(&JS_HIGHLIGHT),
"html" => Some(&HTML_HIGHLIGHT),
"rust" => Some(&RUST_HIGHLIGHT),
_ => None,
}
}
fn to_html<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
language_config: &'a HighlightConfiguration,
) -> Result<Vec<String>, Error> {
highlight_html(
src.as_bytes(),
language,
property_sheet,
let src = src.as_bytes();
let mut renderer = HtmlRenderer::new();
let mut context = HighlightContext::new();
let events = HIGHLIGHTER.highlight(
&mut context,
language_config,
src,
None,
&test_language_for_injection_string,
&|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(),
)
)?;
renderer
.render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes())
.unwrap();
Ok(renderer.lines().map(|s| s.to_string()).collect())
}
fn to_token_vector<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Vec<Vec<(&'a str, Vec<Highlight>)>>, Error> {
language_config: &'a HighlightConfiguration,
) -> Result<Vec<Vec<(&'a str, Vec<&'static str>)>>, Error> {
let src = src.as_bytes();
let mut context = HighlightContext::new();
let mut lines = Vec::new();
let mut highlights = Vec::new();
let mut line = Vec::new();
for event in highlight(
let events = HIGHLIGHTER.highlight(
&mut context,
language_config,
src,
language,
property_sheet,
None,
&test_language_for_injection_string,
)? {
)?;
for event in events {
match event? {
HighlightEvent::HighlightStart(s) => highlights.push(s),
HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHTER.names()[s.0].as_str()),
HighlightEvent::HighlightEnd => {
highlights.pop();
}

View file

@ -3,6 +3,5 @@ mod helpers;
mod highlight_test;
mod node_test;
mod parser_test;
mod properties_test;
mod query_test;
mod tree_test;

View file

@ -7,7 +7,7 @@ use std::{thread, time};
use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
#[test]
fn test_basic_parsing() {
fn test_parsing_simple_string() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
@ -26,7 +26,11 @@ fn test_basic_parsing() {
assert_eq!(
root_node.to_sexp(),
"(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
concat!(
"(source_file ",
"(struct_item name: (type_identifier) body: (field_declaration_list)) ",
"(function_item name: (identifier) parameters: (parameters) body: (block)))"
)
);
let struct_node = root_node.child(0).unwrap();
@ -118,7 +122,17 @@ fn test_parsing_with_custom_utf8_input() {
.unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
assert_eq!(
root.to_sexp(),
concat!(
"(source_file ",
"(function_item ",
"(visibility_modifier) ",
"name: (identifier) ",
"parameters: (parameters) ",
"body: (block (integer_literal))))"
)
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert_eq!(root.child(0).unwrap().kind(), "function_item");
@ -154,7 +168,10 @@ fn test_parsing_with_custom_utf16_input() {
.unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
assert_eq!(
root.to_sexp(),
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert_eq!(root.child(0).unwrap().kind(), "function_item");
@ -175,7 +192,10 @@ fn test_parsing_with_callback_returning_owned_strings() {
.unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
assert_eq!(
root.to_sexp(),
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
);
}
#[test]
@ -192,7 +212,7 @@ fn test_parsing_text_with_byte_order_mark() {
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (function_item (identifier) (parameters) (block)))"
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
);
assert_eq!(tree.root_node().start_byte(), 2);
@ -200,7 +220,7 @@ fn test_parsing_text_with_byte_order_mark() {
let mut tree = parser.parse("\u{FEFF}fn a() {}", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (function_item (identifier) (parameters) (block)))"
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
);
assert_eq!(tree.root_node().start_byte(), 3);
@ -216,7 +236,7 @@ fn test_parsing_text_with_byte_order_mark() {
let mut tree = parser.parse(" \u{FEFF}fn a() {}", Some(&tree)).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (ERROR (UNEXPECTED 65279)) (function_item (identifier) (parameters) (block)))"
"(source_file (ERROR (UNEXPECTED 65279)) (function_item name: (identifier) parameters: (parameters) body: (block)))"
);
assert_eq!(tree.root_node().start_byte(), 1);
@ -232,7 +252,7 @@ fn test_parsing_text_with_byte_order_mark() {
let tree = parser.parse("\u{FEFF}fn a() {}", Some(&tree)).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (function_item (identifier) (parameters) (block)))"
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
);
assert_eq!(tree.root_node().start_byte(), 3);
}

View file

@ -1,265 +0,0 @@
use super::helpers::fixtures::get_language;
use crate::generate::properties;
use serde_derive::Deserialize;
use serde_json;
use std::collections::HashSet;
use tree_sitter::{Parser, PropertySheet};
#[derive(Debug, Default, Deserialize, PartialEq, Eq)]
struct Properties {
a: Option<String>,
b: Option<String>,
}
#[test]
fn test_walk_with_properties_with_nth_child() {
let language = get_language("javascript");
let property_sheet = PropertySheet::<Properties>::new(
language,
&generate_property_sheet_string(
"/some/path.css",
"
binary_expression > identifier:nth-child(2) {
a: x;
}
binary_expression > identifier {
a: y;
}
identifier {
a: z;
}
",
),
)
.unwrap();
let source_code = "a = b || c;";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
assert_eq!(cursor.node().kind(), "program");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "expression_statement");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "assignment_expression");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("z".to_string()),
b: None
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "=");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "binary_expression");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("y".to_string()),
b: None
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "||");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("x".to_string()),
b: None
}
);
}
#[test]
fn test_walk_with_properties_with_regexes() {
let language = get_language("javascript");
let property_sheet = PropertySheet::<Properties>::new(
language,
&generate_property_sheet_string(
"/some/path.css",
"
identifier {
&[text='^[A-Z]'] {
a: y;
}
&[text='^[A-Z_]+$'] {
a: z;
}
a: x;
}
",
),
)
.unwrap();
let source_code = "const ABC = Def(ghi);";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
assert_eq!(cursor.node().kind(), "program");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "lexical_declaration");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "const");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "variable_declarator");
// The later selector with a text regex overrides the earlier one.
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("z".to_string()),
b: None
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "=");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "call_expression");
// The selectors with text regexes override the selector without one.
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("y".to_string()),
b: None
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "arguments");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "(");
// This node doesn't match either of the regexes.
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("x".to_string()),
b: None
}
);
}
#[test]
fn test_walk_with_properties_based_on_fields() {
let language = get_language("javascript");
let property_sheet = PropertySheet::<Properties>::new(
language,
&generate_property_sheet_string(
"/some/path.css",
"
arrow_function > .parameter {
a: x;
}
function_declaration {
& > .parameters > identifier {
a: y;
}
& > .name {
b: z;
}
}
identifier {
a: w;
}
",
),
)
.unwrap();
let source_code = "function a(b) { return c => c + b; }";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "function_declaration");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "function");
assert_eq!(*cursor.node_properties(), Properties::default());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("w".to_string()),
b: Some("z".to_string())
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "formal_parameters");
assert_eq!(*cursor.node_properties(), Properties::default());
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "(");
assert_eq!(*cursor.node_properties(), Properties::default());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("y".to_string()),
b: None,
}
);
assert!(cursor.goto_parent());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "statement_block");
assert!(cursor.goto_first_child());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "return_statement");
assert!(cursor.goto_first_child());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "arrow_function");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("x".to_string()),
b: None,
}
);
}
fn generate_property_sheet_string(path: &str, css: &str) -> String {
serde_json::to_string(&properties::generate_property_sheet(path, css, &HashSet::new()).unwrap())
.unwrap()
}

View file

@ -16,37 +16,79 @@ fn test_query_errors_on_invalid_syntax() {
// Mismatched parens
assert_eq!(
Query::new(language, "(if_statement"),
Err(QueryError::Syntax(13))
Err(QueryError::Syntax(
[
"(if_statement", //
" ^",
]
.join("\n")
))
);
assert_eq!(
Query::new(language, "(if_statement))"),
Err(QueryError::Syntax(14))
Query::new(language, "; comment 1\n; comment 2\n (if_statement))"),
Err(QueryError::Syntax(
[
" (if_statement))", //
" ^",
]
.join("\n")
))
);
// Return an error at the *beginning* of a bare identifier not followed a colon.
// If there's a colon but no pattern, return an error at the end of the colon.
assert_eq!(
Query::new(language, "(if_statement identifier)"),
Err(QueryError::Syntax(14))
Err(QueryError::Syntax(
[
"(if_statement identifier)", //
" ^",
]
.join("\n")
))
);
assert_eq!(
Query::new(language, "(if_statement condition:)"),
Err(QueryError::Syntax(24))
Err(QueryError::Syntax(
[
"(if_statement condition:)", //
" ^",
]
.join("\n")
))
);
// Return an error at the beginning of an unterminated string.
assert_eq!(
Query::new(language, r#"(identifier) "h "#),
Err(QueryError::Syntax(13))
Err(QueryError::Syntax(
[
r#"(identifier) "h "#, //
r#" ^"#,
]
.join("\n")
))
);
assert_eq!(
Query::new(language, r#"((identifier) ()"#),
Err(QueryError::Syntax(16))
Err(QueryError::Syntax(
[
"((identifier) ()", //
" ^",
]
.join("\n")
))
);
assert_eq!(
Query::new(language, r#"((identifier) @x (eq? @x a"#),
Err(QueryError::Syntax(26))
Err(QueryError::Syntax(
[
r#"((identifier) @x (eq? @x a"#,
r#" ^"#,
]
.join("\n")
))
);
});
}
@ -323,6 +365,36 @@ fn test_query_matches_with_many() {
});
}
#[test]
fn test_query_matches_capturing_error_nodes() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(
language,
"
(ERROR (identifier) @the-error-identifier) @the-error
",
)
.unwrap();
let source = "function a(b,, c, d :e:) {}";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), to_callback(source));
assert_eq!(
collect_matches(matches, &query, source),
&[(
0,
vec![("the-error", ":e:"), ("the-error-identifier", "e"),]
),]
);
});
}
#[test]
fn test_query_matches_in_language_with_simple_aliases() {
allocations::record(|| {
@ -1029,6 +1101,16 @@ fn test_query_capture_names() {
});
}
#[test]
fn test_query_with_no_patterns() {
allocations::record(|| {
let language = get_language("javascript");
let query = Query::new(language, "").unwrap();
assert!(query.capture_names().is_empty());
assert_eq!(query.pattern_count(), 0);
});
}
#[test]
fn test_query_comments() {
allocations::record(|| {

View file

@ -1,4 +1,4 @@
Tree-sitter Highlighting
Tree-sitter Highlight
=========================
[![Build Status](https://travis-ci.org/tree-sitter/tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/tree-sitter)
@ -14,42 +14,91 @@ extern "C" tree_sitter_html();
extern "C" tree_sitter_javascript();
```
Load some *property sheets*:
Create a highlighter. You only need one of these:
```rust
use tree_sitter_highlight::load_property_sheet;
use tree_sitter_highlight::Highlighter;
let javascript_property_sheet = load_property_sheet(
fs::read_to_string("./tree-sitter-javascript/src/highlights.json").unwrap()
).unwrap();
let highlighter = Highlighter::new(
[
"attribute",
"constant",
"function.builtin",
"function",
"keyword",
"operator",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"string",
"string.special",
"tag",
"type",
"type.builtin",
"variable",
"variable.builtin",
"variable.parameter",
]
.iter()
.cloned()
.map(String::from)
.collect()
);
```
let html_property_sheet = load_property_sheet(
fs::read_to_string("./tree-sitter-html/src/highlights.json").unwrap()
).unwrap();
Create a highlight context. You need one of these for each thread that you're using for syntax highlighting:
```rust
use tree_sitter_highlight::HighlightContext;
let context = HighlightContext::new();
```
Load some highlighting queries from the `queries` directory of some language repositories:
```rust
let html_language = unsafe { tree_sitter_html() };
let javascript_language = unsafe { tree_sitter_javascript() };
let html_config = highlighter.load_configuration(
html_language,
&fs::read_to_string("./tree-sitter-html/queries/highlights.scm").unwrap(),
&fs::read_to_string("./tree-sitter-html/queries/injections.scm").unwrap(),
"",
);
let javascript_config = highlighter.load_configuration(
javascript_language,
&fs::read_to_string("./tree-sitter-javascript/queries/highlights.scm").unwrap(),
&fs::read_to_string("./tree-sitter-javascript/queries/injections.scm").unwrap(),
&fs::read_to_string("./tree-sitter-javascript/queries/locals.scm").unwrap(),
);
```
Highlight some code:
```rust
use tree_sitter_highlight::{highlight, HighlightEvent};
use tree_sitter_highlight::HighlightEvent;
let highlights = highlight(
let highlights = highlighter.highlight(
&mut context,
javascript_config,
b"const x = new Y();",
unsafe { tree_sitter_javascript() },
&javascript_property_sheet,
None,
&|_| None
).unwrap();
for event in highlights {
match event {
match event? {
HighlightEvent::Source(s) {
eprintln!("source: {:?}", s);
},
HighlightEvent::ScopeStart(s) {
eprintln!("scope started: {:?}", s);
HighlightEvent::HighlightStart(s) {
eprintln!("highlight style started: {:?}", s);
},
HighlightEvent::ScopeEnd(s) {
eprintln!("scope ended: {:?}", s);
HighlightEvent::HighlightEnd(s) {
eprintln!("highlight style ended: {:?}", s);
},
}
}

View file

@ -14,47 +14,15 @@ typedef enum {
TSHighlightInvalidLanguage,
} TSHighlightError;
// The list of scopes which can be styled for syntax highlighting.
// When constructing a `TSHighlighter`, you need to construct an
// `attribute_strings` array whose elements correspond to these values.
enum TSHighlightValue {
TSHighlightValueAttribute,
TSHighlightValueComment,
TSHighlightValueConstant,
TSHighlightValueConstantBuiltin,
TSHighlightValueConstructor,
TSHighlightValueConstructorBuiltin,
TSHighlightValueEmbedded,
TSHighlightValueEscape,
TSHighlightValueFunction,
TSHighlightValueFunctionBuiltin,
TSHighlightValueKeyword,
TSHighlightValueNumber,
TSHighlightValueOperator,
TSHighlightValueProperty,
TSHighlightValuePropertyBuiltin,
TSHighlightValuePunctuation,
TSHighlightValuePunctuationBracket,
TSHighlightValuePunctuationDelimiter,
TSHighlightValuePunctuationSpecial,
TSHighlightValueString,
TSHighlightValueStringSpecial,
TSHighlightValueTag,
TSHighlightValueType,
TSHighlightValueTypeBuiltin,
TSHighlightValueVariable,
TSHighlightValueVariableBuiltin,
TSHighlightValueVariableParameter,
TSHighlightValueUnknown,
};
typedef struct TSHighlighter TSHighlighter;
typedef struct TSHighlightBuffer TSHighlightBuffer;
// Construct a `TSHighlighter` by providing a list of strings containing
// the HTML attributes that should be applied for each highlight value.
TSHighlighter *ts_highlighter_new(
const char **attribute_strings
const char **highlight_names,
const char **attribute_strings,
uint32_t highlight_count
);
// Delete a syntax highlighter.
@ -70,9 +38,14 @@ void ts_highlighter_delete(TSHighlighter *);
int ts_highlighter_add_language(
TSHighlighter *self,
const char *scope_name,
const char *injection_regex,
const TSLanguage *language,
const char *property_sheet_json,
const char *injection_regex
const char *highlight_query,
const char *injection_query,
const char *locals_query,
uint32_t highlight_query_len,
uint32_t injection_query_len,
uint32_t locals_query_len
);
// Compute syntax highlighting for a given document. You must first

View file

@ -1,25 +1,23 @@
use super::{load_property_sheet, Error, Highlight, Highlighter, HtmlRenderer, Properties};
use super::{Error, HighlightConfiguration, HighlightContext, Highlighter, HtmlRenderer};
use regex::Regex;
use std::collections::HashMap;
use std::ffi::CStr;
use std::os::raw::c_char;
use std::process::abort;
use std::sync::atomic::AtomicUsize;
use std::{fmt, slice};
use tree_sitter::{Language, PropertySheet};
struct LanguageConfiguration {
language: Language,
property_sheet: PropertySheet<Properties>,
injection_regex: Option<Regex>,
}
use std::{fmt, slice, str};
use tree_sitter::Language;
pub struct TSHighlighter {
languages: HashMap<String, LanguageConfiguration>,
languages: HashMap<String, (Option<Regex>, HighlightConfiguration)>,
attribute_strings: Vec<&'static [u8]>,
highlighter: Highlighter,
}
pub struct TSHighlightBuffer(HtmlRenderer);
pub struct TSHighlightBuffer {
context: HighlightContext,
renderer: HtmlRenderer,
}
#[repr(C)]
pub enum ErrorCode {
@ -27,33 +25,113 @@ pub enum ErrorCode {
UnknownScope,
Timeout,
InvalidLanguage,
InvalidUtf8,
InvalidRegex,
InvalidQuery,
}
#[no_mangle]
pub extern "C" fn ts_highlighter_new(
highlight_names: *const *const c_char,
attribute_strings: *const *const c_char,
highlight_count: u32,
) -> *mut TSHighlighter {
let highlight_names =
unsafe { slice::from_raw_parts(highlight_names, highlight_count as usize) };
let attribute_strings =
unsafe { slice::from_raw_parts(attribute_strings, Highlight::Unknown as usize + 1) };
unsafe { slice::from_raw_parts(attribute_strings, highlight_count as usize) };
let highlight_names = highlight_names
.into_iter()
.map(|s| unsafe { CStr::from_ptr(*s).to_string_lossy().to_string() })
.collect();
let attribute_strings = attribute_strings
.into_iter()
.map(|s| {
if s.is_null() {
&[]
} else {
unsafe { CStr::from_ptr(*s).to_bytes() }
}
})
.map(|s| unsafe { CStr::from_ptr(*s).to_bytes() })
.collect();
let highlighter = Highlighter::new(highlight_names);
Box::into_raw(Box::new(TSHighlighter {
languages: HashMap::new(),
attribute_strings,
highlighter,
}))
}
#[no_mangle]
pub extern "C" fn ts_highlighter_add_language(
this: *mut TSHighlighter,
scope_name: *const c_char,
injection_regex: *const c_char,
language: Language,
highlight_query: *const c_char,
injection_query: *const c_char,
locals_query: *const c_char,
highlight_query_len: u32,
injection_query_len: u32,
locals_query_len: u32,
) -> ErrorCode {
let f = move || {
let this = unwrap_mut_ptr(this);
let scope_name = unsafe { CStr::from_ptr(scope_name) };
let scope_name = scope_name
.to_str()
.or(Err(ErrorCode::InvalidUtf8))?
.to_string();
let injection_regex = if injection_regex.is_null() {
None
} else {
let pattern = unsafe { CStr::from_ptr(injection_regex) };
let pattern = pattern.to_str().or(Err(ErrorCode::InvalidUtf8))?;
Some(Regex::new(pattern).or(Err(ErrorCode::InvalidRegex))?)
};
let highlight_query = unsafe {
slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize)
};
let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?;
let injection_query = if injection_query_len > 0 {
let query = unsafe {
slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize)
};
str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
} else {
""
};
let locals_query = if locals_query_len > 0 {
let query = unsafe {
slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize)
};
str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
} else {
""
};
this.languages.insert(
scope_name,
(
injection_regex,
this.highlighter
.load_configuration(language, highlight_query, injection_query, locals_query)
.or(Err(ErrorCode::InvalidQuery))?,
),
);
Ok(())
};
match f() {
Ok(()) => ErrorCode::Ok,
Err(e) => e,
}
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
Box::into_raw(Box::new(TSHighlightBuffer(HtmlRenderer::new())))
Box::into_raw(Box::new(TSHighlightBuffer {
context: HighlightContext::new(),
renderer: HtmlRenderer::new(),
}))
}
#[no_mangle]
@ -69,59 +147,25 @@ pub extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_content(this: *const TSHighlightBuffer) -> *const u8 {
let this = unwrap_ptr(this);
this.0.html.as_slice().as_ptr()
this.renderer.html.as_slice().as_ptr()
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_line_offsets(this: *const TSHighlightBuffer) -> *const u32 {
let this = unwrap_ptr(this);
this.0.line_offsets.as_slice().as_ptr()
this.renderer.line_offsets.as_slice().as_ptr()
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_len(this: *const TSHighlightBuffer) -> u32 {
let this = unwrap_ptr(this);
this.0.html.len() as u32
this.renderer.html.len() as u32
}
#[no_mangle]
pub extern "C" fn ts_highlight_buffer_line_count(this: *const TSHighlightBuffer) -> u32 {
let this = unwrap_ptr(this);
this.0.line_offsets.len() as u32
}
#[no_mangle]
pub extern "C" fn ts_highlighter_add_language(
this: *mut TSHighlighter,
scope_name: *const c_char,
language: Language,
property_sheet_json: *const c_char,
injection_regex: *const c_char,
) -> ErrorCode {
let this = unwrap_mut_ptr(this);
let scope_name = unsafe { CStr::from_ptr(scope_name) };
let scope_name = unwrap(scope_name.to_str()).to_string();
let property_sheet_json = unsafe { CStr::from_ptr(property_sheet_json) };
let property_sheet_json = unwrap(property_sheet_json.to_str());
let property_sheet = unwrap(load_property_sheet(language, property_sheet_json));
let injection_regex = if injection_regex.is_null() {
None
} else {
let pattern = unsafe { CStr::from_ptr(injection_regex) };
Some(unwrap(Regex::new(unwrap(pattern.to_str()))))
};
this.languages.insert(
scope_name,
LanguageConfiguration {
language,
property_sheet,
injection_regex,
},
);
ErrorCode::Ok
this.renderer.line_offsets.len() as u32
}
#[no_mangle]
@ -150,36 +194,36 @@ impl TSHighlighter {
output: &mut TSHighlightBuffer,
cancellation_flag: Option<&AtomicUsize>,
) -> ErrorCode {
let configuration = self.languages.get(scope_name);
if configuration.is_none() {
let entry = self.languages.get(scope_name);
if entry.is_none() {
return ErrorCode::UnknownScope;
}
let configuration = configuration.unwrap();
let (_, configuration) = entry.unwrap();
let languages = &self.languages;
let highlighter = Highlighter::new(
let highlights = self.highlighter.highlight(
&mut output.context,
configuration,
source_code,
configuration.language,
&configuration.property_sheet,
|injection_string| {
languages.values().find_map(|conf| {
conf.injection_regex.as_ref().and_then(|regex| {
cancellation_flag,
move |injection_string| {
languages.values().find_map(|(injection_regex, config)| {
injection_regex.as_ref().and_then(|regex| {
if regex.is_match(injection_string) {
Some((conf.language, &conf.property_sheet))
Some(config)
} else {
None
}
})
})
},
cancellation_flag,
);
if let Ok(highlighter) = highlighter {
output.0.reset();
let result = output.0.render(highlighter, source_code, &|s| {
self.attribute_strings[s as usize]
});
if let Ok(highlights) = highlights {
output.renderer.reset();
let result = output
.renderer
.render(highlights, source_code, &|s| self.attribute_strings[s.0]);
match result {
Err(Error::Cancelled) => {
return ErrorCode::Timeout;

File diff suppressed because it is too large Load diff

View file

@ -655,6 +655,16 @@ extern "C" {
length: *mut u32,
) -> *const ::std::os::raw::c_char;
}
extern "C" {
#[doc = " Disable a certain capture within a query. This prevents the capture"]
#[doc = " from being returned in matches, and also avoids any resource usage"]
#[doc = " associated with recording the capture."]
pub fn ts_query_disable_capture(
arg1: *mut TSQuery,
arg2: *const ::std::os::raw::c_char,
arg3: u32,
);
}
extern "C" {
#[doc = " Create a new cursor for executing a given query."]
#[doc = ""]

View file

@ -1,18 +1,14 @@
mod ffi;
mod util;
#[macro_use]
extern crate serde_derive;
extern crate regex;
extern crate serde;
extern crate serde_derive;
extern crate serde_json;
#[cfg(unix)]
use std::os::unix::io::AsRawFd;
use regex::Regex;
use serde::de::DeserializeOwned;
use std::collections::HashMap;
use std::ffi::CStr;
use std::marker::PhantomData;
use std::mem::MaybeUninit;
@ -65,62 +61,6 @@ pub struct InputEdit {
pub new_end_position: Point,
}
struct PropertyTransition {
state_id: u16,
child_index: Option<u16>,
text_regex_index: Option<u16>,
node_kind_id: Option<u16>,
}
struct PropertyState {
field_transitions: HashMap<u16, Vec<PropertyTransition>>,
kind_transitions: HashMap<u16, Vec<PropertyTransition>>,
property_set_id: usize,
default_next_state_id: usize,
}
#[derive(Debug)]
pub enum PropertySheetError {
InvalidJSON(serde_json::Error),
InvalidRegex(regex::Error),
}
pub struct PropertySheet<P = HashMap<String, String>> {
states: Vec<PropertyState>,
property_sets: Vec<P>,
text_regexes: Vec<Regex>,
}
#[derive(Clone, Debug, Deserialize, Serialize, Hash, PartialEq, Eq)]
pub struct PropertyTransitionJSON {
#[serde(rename = "type")]
#[serde(skip_serializing_if = "Option::is_none")]
pub kind: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub named: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub index: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub field: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub text: Option<String>,
pub state_id: usize,
}
#[derive(Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
pub struct PropertyStateJSON {
pub id: Option<usize>,
pub property_set_id: usize,
pub transitions: Vec<PropertyTransitionJSON>,
pub default_next_state_id: usize,
}
#[derive(Debug, Deserialize, Serialize)]
pub struct PropertySheetJSON<P> {
pub states: Vec<PropertyStateJSON>,
pub property_sets: Vec<P>,
}
#[derive(Clone, Copy)]
#[repr(transparent)]
pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>);
@ -131,14 +71,6 @@ pub struct Tree(NonNull<ffi::TSTree>);
pub struct TreeCursor<'a>(ffi::TSTreeCursor, PhantomData<&'a ()>);
pub struct TreePropertyCursor<'a, P> {
cursor: TreeCursor<'a>,
state_stack: Vec<usize>,
child_index_stack: Vec<usize>,
property_sheet: &'a PropertySheet<P>,
source: &'a [u8],
}
#[derive(Debug)]
enum TextPredicate {
CaptureEqString(u32, String),
@ -186,7 +118,7 @@ pub struct QueryCapture<'a> {
#[derive(Debug, PartialEq, Eq)]
pub enum QueryError {
Syntax(usize),
Syntax(String),
NodeType(String),
Field(String),
Capture(String),
@ -252,6 +184,7 @@ impl fmt::Display for LanguageError {
}
impl Parser {
/// Create a new parser.
pub fn new() -> Parser {
unsafe {
let parser = ffi::ts_parser_new();
@ -259,6 +192,14 @@ impl Parser {
}
}
/// Set the language that the parser should use for parsing.
///
/// Returns a Result indicating whether or not the language was successfully
/// assigned. True means assignment succeeded. False means there was a version
/// mismatch: the language was generated with an incompatible version of the
/// Tree-sitter CLI. Check the language's version using `ts_language_version`
/// and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and
/// `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants.
pub fn set_language(&mut self, language: Language) -> Result<(), LanguageError> {
let version = language.version();
if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
@ -554,14 +495,6 @@ impl Tree {
self.root_node().walk()
}
pub fn walk_with_properties<'a, P>(
&'a self,
property_sheet: &'a PropertySheet<P>,
source: &'a [u8],
) -> TreePropertyCursor<'a, P> {
TreePropertyCursor::new(self, property_sheet, source)
}
pub fn changed_ranges(&self, other: &Tree) -> impl ExactSizeIterator<Item = Range> {
let mut count = 0;
unsafe {
@ -858,125 +791,6 @@ impl<'a> Drop for TreeCursor<'a> {
}
}
impl<'a, P> TreePropertyCursor<'a, P> {
fn new(tree: &'a Tree, property_sheet: &'a PropertySheet<P>, source: &'a [u8]) -> Self {
let mut result = Self {
cursor: tree.root_node().walk(),
child_index_stack: vec![0],
state_stack: vec![0],
property_sheet,
source,
};
let state = result.next_state(0);
result.state_stack.push(state);
result
}
pub fn node(&self) -> Node<'a> {
self.cursor.node()
}
pub fn node_properties(&self) -> &'a P {
&self.property_sheet.property_sets[self.current_state().property_set_id]
}
pub fn goto_first_child(&mut self) -> bool {
if self.cursor.goto_first_child() {
let next_state_id = self.next_state(0);
self.state_stack.push(next_state_id);
self.child_index_stack.push(0);
true
} else {
false
}
}
pub fn goto_next_sibling(&mut self) -> bool {
if self.cursor.goto_next_sibling() {
let child_index = self.child_index_stack.pop().unwrap() + 1;
self.state_stack.pop();
let next_state_id = self.next_state(child_index);
self.state_stack.push(next_state_id);
self.child_index_stack.push(child_index);
true
} else {
false
}
}
pub fn goto_parent(&mut self) -> bool {
if self.cursor.goto_parent() {
self.state_stack.pop();
self.child_index_stack.pop();
true
} else {
false
}
}
pub fn source(&self) -> &'a [u8] {
&self.source
}
fn next_state(&self, node_child_index: usize) -> usize {
let current_state = self.current_state();
let default_state = self.default_state();
for state in [current_state, default_state].iter() {
let node_field_id = self.cursor.field_id();
let node_kind_id = self.cursor.node().kind_id();
let transitions = node_field_id
.and_then(|field_id| state.field_transitions.get(&field_id))
.or_else(|| state.kind_transitions.get(&node_kind_id));
if let Some(transitions) = transitions {
for transition in transitions.iter() {
if transition
.node_kind_id
.map_or(false, |id| id != node_kind_id)
{
continue;
}
if let Some(text_regex_index) = transition.text_regex_index {
let node = self.cursor.node();
let text = &self.source[node.start_byte()..node.end_byte()];
if let Ok(text) = str::from_utf8(text) {
if !self.property_sheet.text_regexes[text_regex_index as usize]
.is_match(text)
{
continue;
}
}
}
if let Some(child_index) = transition.child_index {
if child_index != node_child_index as u16 {
continue;
}
}
return transition.state_id as usize;
}
}
if current_state as *const PropertyState == default_state as *const PropertyState {
break;
}
}
current_state.default_next_state_id
}
fn current_state(&self) -> &PropertyState {
&self.property_sheet.states[*self.state_stack.last().unwrap()]
}
fn default_state(&self) -> &PropertyState {
&self.property_sheet.states.first().unwrap()
}
}
impl Query {
pub fn new(language: Language, source: &str) -> Result<Self, QueryError> {
let mut error_offset = 0u32;
@ -997,6 +811,24 @@ impl Query {
// On failure, build an error based on the error code and offset.
if ptr.is_null() {
let offset = error_offset as usize;
let mut line_start = 0;
let line_containing_error = source.split("\n").find_map(|line| {
let line_end = line_start + line.len() + 1;
if line_end > offset {
Some(line)
} else {
line_start = line_end;
None
}
});
let message = if let Some(line) = line_containing_error {
line.to_string() + "\n" + &" ".repeat(offset - line_start) + "^"
} else {
"Unexpected EOF".to_string()
};
// if line_containing_error
return if error_type != ffi::TSQueryError_TSQueryErrorSyntax {
let suffix = source.split_at(offset).1;
let end_offset = suffix
@ -1007,10 +839,10 @@ impl Query {
ffi::TSQueryError_TSQueryErrorNodeType => Err(QueryError::NodeType(name)),
ffi::TSQueryError_TSQueryErrorField => Err(QueryError::Field(name)),
ffi::TSQueryError_TSQueryErrorCapture => Err(QueryError::Capture(name)),
_ => Err(QueryError::Syntax(offset)),
_ => Err(QueryError::Syntax(message)),
}
} else {
Err(QueryError::Syntax(offset))
Err(QueryError::Syntax(message))
};
}
@ -1202,6 +1034,16 @@ impl Query {
&self.property_settings[index]
}
pub fn disable_capture(&mut self, name: &str) {
unsafe {
ffi::ts_query_disable_capture(
self.ptr.as_ptr(),
name.as_bytes().as_ptr() as *const c_char,
name.len() as u32,
);
}
}
fn parse_property(
function_name: &str,
capture_names: &[String],
@ -1260,7 +1102,7 @@ impl QueryCursor {
}
pub fn matches<'a>(
&'a mut self,
&mut self,
query: &'a Query,
node: Node<'a>,
mut text_callback: impl FnMut(Node<'a>) -> &[u8] + 'a,
@ -1283,7 +1125,7 @@ impl QueryCursor {
}
pub fn captures<'a, T: AsRef<[u8]>>(
&'a mut self,
&mut self,
query: &'a Query,
node: Node<'a>,
text_callback: impl FnMut(Node<'a>) -> T + 'a,
@ -1485,154 +1327,6 @@ impl<'a> Into<ffi::TSInputEdit> for &'a InputEdit {
}
}
impl<P> PropertySheet<P> {
pub fn new(language: Language, json: &str) -> Result<Self, PropertySheetError>
where
P: DeserializeOwned,
{
let input: PropertySheetJSON<P> =
serde_json::from_str(json).map_err(PropertySheetError::InvalidJSON)?;
let mut states = Vec::new();
let mut text_regexes = Vec::new();
let mut text_regex_patterns = Vec::new();
for state in input.states.iter() {
let node_kind_count = language.node_kind_count();
let mut kind_transitions = HashMap::new();
let mut field_transitions = HashMap::new();
for transition in state.transitions.iter() {
let field_id = transition
.field
.as_ref()
.and_then(|field| language.field_id_for_name(&field));
if let Some(field_id) = field_id {
field_transitions.entry(field_id).or_insert(Vec::new());
}
}
for transition in state.transitions.iter() {
let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() {
if let Some(index) =
text_regex_patterns.iter().position(|r| *r == regex_pattern)
{
Some(index as u16)
} else {
text_regex_patterns.push(regex_pattern);
text_regexes.push(
Regex::new(&regex_pattern).map_err(PropertySheetError::InvalidRegex)?,
);
Some(text_regexes.len() as u16 - 1)
}
} else {
None
};
let state_id = transition.state_id as u16;
let child_index = transition.index.map(|i| i as u16);
let field_id = transition
.field
.as_ref()
.and_then(|field| language.field_id_for_name(&field));
if let Some(kind) = transition.kind.as_ref() {
for kind_id in 0..(node_kind_count as u16) {
if kind != language.node_kind_for_id(kind_id)
|| transition.named != Some(language.node_kind_is_named(kind_id))
{
continue;
}
if let Some(field_id) = field_id {
field_transitions
.entry(field_id)
.or_insert(Vec::new())
.push(PropertyTransition {
node_kind_id: Some(kind_id),
state_id,
child_index,
text_regex_index,
});
} else {
for (_, entries) in field_transitions.iter_mut() {
entries.push(PropertyTransition {
node_kind_id: Some(kind_id),
state_id,
child_index,
text_regex_index,
});
}
kind_transitions.entry(kind_id).or_insert(Vec::new()).push(
PropertyTransition {
node_kind_id: None,
state_id,
child_index,
text_regex_index,
},
);
}
}
} else if let Some(field_id) = field_id {
field_transitions
.entry(field_id)
.or_insert(Vec::new())
.push(PropertyTransition {
node_kind_id: None,
state_id,
child_index,
text_regex_index,
});
}
}
states.push(PropertyState {
field_transitions,
kind_transitions,
default_next_state_id: state.default_next_state_id,
property_set_id: state.property_set_id,
});
}
Ok(Self {
property_sets: input.property_sets,
states,
text_regexes,
})
}
pub fn map<F, T, E>(self, mut f: F) -> Result<PropertySheet<T>, E>
where
F: FnMut(P) -> Result<T, E>,
{
let mut property_sets = Vec::with_capacity(self.property_sets.len());
for set in self.property_sets {
property_sets.push(f(set)?);
}
Ok(PropertySheet {
states: self.states,
text_regexes: self.text_regexes,
property_sets,
})
}
}
impl fmt::Display for PropertySheetError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
PropertySheetError::InvalidJSON(e) => write!(f, "Invalid JSON: {}", e),
PropertySheetError::InvalidRegex(e) => write!(f, "Invalid Regex: {}", e),
}
}
}
impl std::error::Error for PropertySheetError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
PropertySheetError::InvalidJSON(e) => Some(e),
PropertySheetError::InvalidRegex(e) => Some(e),
}
}
}
unsafe impl Send for Language {}
unsafe impl Send for Parser {}
unsafe impl Send for Query {}

View file

@ -185,7 +185,10 @@ describe("Parser", () => {
tree = parser.parse("const x: &'static str = r###\"hello\"###;");
assert.equal(
tree.rootNode.toString(),
'(source_file (const_item (identifier) (reference_type (lifetime (identifier)) (primitive_type)) (raw_string_literal)))'
'(source_file (const_item ' +
'name: (identifier) ' +
'type: (reference_type (lifetime (identifier)) type: (primitive_type)) ' +
'value: (raw_string_literal)))'
);
}).timeout(5000);

View file

@ -716,6 +716,13 @@ const char *ts_query_string_value_for_id(
uint32_t *length
);
/**
* Disable a certain capture within a query. This prevents the capture
* from being returned in matches, and also avoids any resource usage
* associated with recording the capture.
*/
void ts_query_disable_capture(TSQuery *, const char *, uint32_t);
/**
* Create a new cursor for executing a given query.
*

View file

@ -36,7 +36,8 @@ typedef struct {
TSSymbol symbol;
TSFieldId field;
uint16_t capture_id;
uint16_t depth;
uint16_t depth: 15;
bool contains_captures: 1;
} QueryStep;
/*
@ -327,6 +328,7 @@ static TSSymbol ts_query_intern_node_name(
uint32_t length,
TSSymbolType symbol_type
) {
if (!strncmp(name, "ERROR", length)) return ts_builtin_sym_error;
uint32_t symbol_count = ts_language_symbol_count(self->language);
for (TSSymbol i = 0; i < symbol_count; i++) {
if (ts_language_symbol_type(self->language, i) != symbol_type) continue;
@ -403,6 +405,23 @@ static inline void ts_query__pattern_map_insert(
}));
}
static void ts_query__finalize_steps(TSQuery *self) {
for (unsigned i = 0; i < self->steps.size; i++) {
QueryStep *step = &self->steps.contents[i];
uint32_t depth = step->depth;
if (step->capture_id != NONE) {
step->contains_captures = true;
} else {
step->contains_captures = false;
for (unsigned j = i + 1; j < self->steps.size; j++) {
QueryStep *s = &self->steps.contents[j];
if (s->depth == PATTERN_DONE_MARKER || s->depth <= depth) break;
if (s->capture_id != NONE) step->contains_captures = true;
}
}
}
}
// Parse a single predicate associated with a pattern, adding it to the
// query's internal `predicate_steps` array. Predicates are arbitrary
// S-expressions associated with a pattern which are meant to be handled at
@ -593,6 +612,7 @@ static TSQueryError ts_query_parse_pattern(
.symbol = symbol,
.field = 0,
.capture_id = NONE,
.contains_captures = false,
}));
// Parse the child patterns
@ -638,6 +658,7 @@ static TSQueryError ts_query_parse_pattern(
.symbol = symbol,
.field = 0,
.capture_id = NONE,
.contains_captures = false,
}));
if (stream->next != '"') return TSQueryErrorSyntax;
@ -688,6 +709,7 @@ static TSQueryError ts_query_parse_pattern(
.depth = depth,
.symbol = WILDCARD_SYMBOL,
.field = 0,
.contains_captures = false,
}));
}
@ -768,7 +790,7 @@ TSQuery *ts_query_new(
Stream stream = stream_new(source, source_len);
stream_skip_whitespace(&stream);
uint32_t start_step_index;
for (;;) {
while (stream.input < stream.end) {
start_step_index = self->steps.size;
uint32_t capture_count = 0;
array_push(&self->start_bytes_by_pattern, stream.input - source);
@ -803,10 +825,9 @@ TSQuery *ts_query_new(
if (capture_count > self->max_capture_count) {
self->max_capture_count = capture_count;
}
if (stream.input == stream.end) break;
}
ts_query__finalize_steps(self);
return self;
}
@ -869,6 +890,23 @@ uint32_t ts_query_start_byte_for_pattern(
return self->start_bytes_by_pattern.contents[pattern_index];
}
void ts_query_disable_capture(
TSQuery *self,
const char *name,
uint32_t length
) {
int id = symbol_table_id_for_name(&self->captures, name, length);
if (id != -1) {
for (unsigned i = 0; i < self->steps.size; i++) {
QueryStep *step = &self->steps.contents[i];
if (step->capture_id == id) {
step->capture_id = NONE;
}
}
}
ts_query__finalize_steps(self);
}
/***************
* QueryCursor
***************/
@ -1000,7 +1038,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
} else if (ts_tree_cursor_goto_parent(&self->cursor)) {
self->depth--;
} else {
return false;
return self->finished_states.size > 0;
}
} else {
bool can_have_later_siblings;
@ -1147,11 +1185,15 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
// capturing different children. If this pattern step could match
// later children within the same parent, then this query state
// cannot simply be updated in place. It must be split into two
// states: one that captures this node, and one which skips over
// this node, to preserve the possibility of capturing later
// states: one that matches this node, and one which skips over
// this node, to preserve the possibility of matching later
// siblings.
QueryState *next_state = state;
if (step->depth > 0 && later_sibling_can_match) {
if (
step->depth > 0 &&
step->contains_captures &&
later_sibling_can_match
) {
LOG(
" split state. pattern:%u, step:%u\n",
state->pattern_index,
@ -1190,7 +1232,7 @@ static inline bool ts_query_cursor__advance(TSQueryCursor *self) {
next_state->step_index++;
QueryStep *next_step = step + 1;
if (next_step->depth == PATTERN_DONE_MARKER) {
LOG("finish pattern %u\n", next_state->pattern_index);
LOG(" finish pattern %u\n", next_state->pattern_index);
next_state->id = self->next_state_id++;
array_push(&self->finished_states, *next_state);

View file

@ -24,12 +24,12 @@ fetch_grammar() {
fetch_grammar bash master
fetch_grammar c master
fetch_grammar cpp master
fetch_grammar embedded-template master
fetch_grammar embedded-template highlight-queries
fetch_grammar go master
fetch_grammar html master
fetch_grammar javascript master
fetch_grammar html highlight-queries
fetch_grammar javascript highlight-queries
fetch_grammar json master
fetch_grammar python master
fetch_grammar ruby master
fetch_grammar rust master
fetch_grammar rust highlight-queries
fetch_grammar typescript master

View file

@ -3,14 +3,14 @@
call:fetch_grammar bash master
call:fetch_grammar c master
call:fetch_grammar cpp master
call:fetch_grammar embedded-template master
call:fetch_grammar embedded-template highlight-queries
call:fetch_grammar go master
call:fetch_grammar html master
call:fetch_grammar javascript master
call:fetch_grammar html highlight-queries
call:fetch_grammar javascript highlight-queries
call:fetch_grammar json master
call:fetch_grammar python master
call:fetch_grammar ruby master
call:fetch_grammar rust master
call:fetch_grammar rust highlight-queries
call:fetch_grammar typescript master
exit /B 0