Merge pull request #2914 from amaanq/clippy-pt2

chore: apply clippy fixes
This commit is contained in:
Amaan Qureshi 2024-02-04 05:00:44 -05:00 committed by GitHub
commit 8bb98dfdc2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
64 changed files with 1597 additions and 1680 deletions

View file

@ -35,19 +35,9 @@ jobs:
platform:
- linux-arm64 #
- linux-arm #
- linux-armhf #
- linux-armv5te #
- linux-armv7l #
- linux-x64 #
- linux-x86 #
- linux-i586 #
- linux-powerpc #
- linux-powerpc64 #
- linux-powerpc64el #
# - linux-riscv64gc # #2712
- linux-s390x #
- linux-sparc64 #
- linux-thumbv7neon #
- windows-arm64 #
- windows-x64 # <-- No C library build - requires an additional adapted Makefile for `cl.exe` compiler
- windows-x86 # -- // --
@ -60,19 +50,9 @@ jobs:
# 2. Add a new record to a matrix map in `cli/npm/install.js`
- { platform: linux-arm64 , target: aarch64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-arm , target: arm-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true }
- { platform: linux-armhf , target: arm-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true }
- { platform: linux-armv5te , target: armv5te-unknown-linux-gnueabi , os: ubuntu-latest , use-cross: true }
- { platform: linux-armv7l , target: armv7-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true }
- { platform: linux-x64 , target: x86_64-unknown-linux-gnu , os: ubuntu-20.04 , enable-wasm: true } #2272
- { platform: linux-x86 , target: i686-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-i586 , target: i586-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-powerpc , target: powerpc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-powerpc64 , target: powerpc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-powerpc64el , target: powerpc64le-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
# - { platform: linux-riscv64gc , target: riscv64gc-unknown-linux-gnu , os: ubuntu-latest , use-cross: true } #2712
- { platform: linux-s390x , target: s390x-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-sparc64 , target: sparc64-unknown-linux-gnu , os: ubuntu-latest , use-cross: true }
- { platform: linux-thumbv7neon , target: thumbv7neon-unknown-linux-gnueabihf , os: ubuntu-latest , use-cross: true }
- { platform: windows-arm64 , target: aarch64-pc-windows-msvc , os: windows-latest }
- { platform: windows-x64 , target: x86_64-pc-windows-msvc , os: windows-latest , enable-wasm: true }
- { platform: windows-x86 , target: i686-pc-windows-msvc , os: windows-latest }
@ -82,18 +62,8 @@ jobs:
# Cross compilers for C library
- { platform: linux-arm64 , cc: aarch64-linux-gnu-gcc , ar: aarch64-linux-gnu-ar }
- { platform: linux-arm , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar }
- { platform: linux-armhf , cc: arm-unknown-linux-gnueabihf-gcc , ar: arm-unknown-linux-gnueabihf-ar }
- { platform: linux-armv5te , cc: arm-linux-gnueabi-gcc , ar: arm-linux-gnueabi-ar }
- { platform: linux-armv7l , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar }
- { platform: linux-x86 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar }
- { platform: linux-i586 , cc: i686-linux-gnu-gcc , ar: i686-linux-gnu-ar }
- { platform: linux-powerpc , cc: powerpc-linux-gnu-gcc , ar: powerpc-linux-gnu-ar }
- { platform: linux-powerpc64 , cc: powerpc64-linux-gnu-gcc , ar: powerpc64-linux-gnu-ar }
- { platform: linux-powerpc64el , cc: powerpc64le-linux-gnu-gcc , ar: powerpc64le-linux-gnu-ar }
# - { platform: linux-riscv64gc , cc: riscv64-linux-gnu-gcc , ar: riscv64-linux-gnu-ar } #2712
- { platform: linux-s390x , cc: s390x-linux-gnu-gcc , ar: s390x-linux-gnu-ar }
- { platform: linux-sparc64 , cc: sparc64-linux-gnu-gcc , ar: sparc64-linux-gnu-ar }
- { platform: linux-thumbv7neon , cc: arm-linux-gnueabihf-gcc , ar: arm-linux-gnueabihf-ar }
# See #2041 tree-sitter issue
- { platform: windows-x64 , rust-test-threads: 1 }

View file

@ -25,7 +25,7 @@ lazy_static! {
let (example_paths, query_paths) =
result.entry(relative_path.to_owned()).or_default();
if let Ok(example_files) = fs::read_dir(&dir.join("examples")) {
if let Ok(example_files) = fs::read_dir(dir.join("examples")) {
example_paths.extend(example_files.filter_map(|p| {
let p = p.unwrap().path();
if p.is_file() {
@ -36,7 +36,7 @@ lazy_static! {
}));
}
if let Ok(query_files) = fs::read_dir(&dir.join("queries")) {
if let Ok(query_files) = fs::read_dir(dir.join("queries")) {
query_paths.extend(query_files.filter_map(|p| {
let p = p.unwrap().path();
if p.is_file() {
@ -47,7 +47,7 @@ lazy_static! {
}));
}
} else {
for entry in fs::read_dir(&dir).unwrap() {
for entry in fs::read_dir(dir).unwrap() {
let entry = entry.unwrap().path();
if entry.is_dir() {
process_dir(result, &entry);
@ -102,7 +102,7 @@ fn main() {
}
}
parse(&path, max_path_length, |source| {
parse(path, max_path_length, |source| {
Query::new(&language, str::from_utf8(source).unwrap())
.with_context(|| format!("Query file path: {path:?}"))
.expect("Failed to parse query");
@ -167,7 +167,7 @@ fn main() {
eprintln!(" Average Speed (errors): {} bytes/ms", average_error);
eprintln!(" Worst Speed (errors): {} bytes/ms", worst_error);
}
eprintln!("");
eprintln!();
}
fn aggregate(speeds: &Vec<usize>) -> Option<(usize, usize)> {

View file

@ -4,18 +4,15 @@ use std::{env, fs};
fn main() {
if let Some(git_sha) = read_git_sha() {
println!("cargo:rustc-env={}={}", "BUILD_SHA", git_sha);
println!("cargo:rustc-env=BUILD_SHA={git_sha}");
}
if web_playground_files_present() {
println!("cargo:rustc-cfg={}", "TREE_SITTER_EMBED_WASM_BINDING");
println!("cargo:rustc-cfg=TREE_SITTER_EMBED_WASM_BINDING");
}
let rust_binding_version = read_rust_binding_version();
println!(
"cargo:rustc-env={}={}",
"RUST_BINDING_VERSION", rust_binding_version,
);
println!("cargo:rustc-env=RUST_BINDING_VERSION={rust_binding_version}");
}
fn web_playground_files_present() -> bool {
@ -51,10 +48,10 @@ fn read_git_sha() -> Option<String> {
}
let git_head_path = git_dir_path.join("HEAD");
if let Some(path) = git_head_path.to_str() {
println!("cargo:rerun-if-changed={}", path);
println!("cargo:rerun-if-changed={path}");
}
if let Ok(mut head_content) = fs::read_to_string(&git_head_path) {
if head_content.ends_with("\n") {
if head_content.ends_with('\n') {
head_content.pop();
}
@ -65,13 +62,12 @@ fn read_git_sha() -> Option<String> {
// Go to real non-worktree gitdir
let git_dir_path = git_dir_path
.parent()
.map(|p| {
.and_then(|p| {
p.file_name()
.map(|n| n == OsStr::new("worktrees"))
.and_then(|x| x.then(|| p.parent()))
})
.flatten()
.flatten()
.unwrap_or(&git_dir_path);
let file = git_dir_path.join(&head_content);
@ -84,7 +80,7 @@ fn read_git_sha() -> Option<String> {
if let Some((hash, r#ref)) = line.split_once(' ') {
if r#ref == head_content {
if let Some(path) = packed_refs.to_str() {
println!("cargo:rerun-if-changed={}", path);
println!("cargo:rerun-if-changed={path}");
}
return Some(hash.to_string());
}
@ -95,7 +91,7 @@ fn read_git_sha() -> Option<String> {
}
};
if let Some(path) = ref_filename.to_str() {
println!("cargo:rerun-if-changed={}", path);
println!("cargo:rerun-if-changed={path}");
}
return fs::read_to_string(&ref_filename).ok();
}

View file

@ -39,7 +39,7 @@ impl Config {
}
let legacy_path = dirs::home_dir()
.ok_or(anyhow!("Cannot determine home directory"))?
.ok_or_else(|| anyhow!("Cannot determine home directory"))?
.join(".tree-sitter")
.join("config.json");
if legacy_path.is_file() {
@ -51,7 +51,7 @@ impl Config {
fn xdg_config_file() -> Result<PathBuf> {
let xdg_path = dirs::config_dir()
.ok_or(anyhow!("Cannot determine config directory"))?
.ok_or_else(|| anyhow!("Cannot determine config directory"))?
.join("tree-sitter")
.join("config.json");
Ok(xdg_path)
@ -65,16 +65,15 @@ impl Config {
/// by [`dirs::config_dir`](https://docs.rs/dirs/*/dirs/fn.config_dir.html)
/// - `$HOME/.tree-sitter/config.json` as a fallback from where tree-sitter _used_ to store
/// its configuration
pub fn load() -> Result<Config> {
let location = match Self::find_config_file()? {
Some(location) => location,
None => return Config::initial(),
pub fn load() -> Result<Self> {
let Some(location) = Self::find_config_file()? else {
return Self::initial();
};
let content = fs::read_to_string(&location)
.with_context(|| format!("Failed to read {}", &location.to_string_lossy()))?;
let config = serde_json::from_str(&content)
.with_context(|| format!("Bad JSON config {}", &location.to_string_lossy()))?;
Ok(Config { location, config })
Ok(Self { location, config })
}
/// Creates an empty initial configuration file. You can then use the [`Config::add`][] method
@ -83,7 +82,7 @@ impl Config {
/// disk.
///
/// (Note that this is typically only done by the `tree-sitter init-config` command.)
pub fn initial() -> Result<Config> {
pub fn initial() -> Result<Self> {
let location = if let Ok(path) = env::var("TREE_SITTER_DIR") {
let mut path = PathBuf::from(path);
path.push("config.json");
@ -92,7 +91,7 @@ impl Config {
Self::xdg_config_file()?
};
let config = serde_json::json!({});
Ok(Config { location, config })
Ok(Self { location, config })
}
/// Saves this configuration to the file that it was originally loaded from.

View file

@ -5,8 +5,5 @@ fn main() {
);
let emscripten_version = std::fs::read_to_string("emscripten-version").unwrap();
println!(
"cargo:rustc-env={}={}",
"EMSCRIPTEN_VERSION", emscripten_version,
);
println!("cargo:rustc-env=EMSCRIPTEN_VERSION={emscripten_version}");
}

View file

@ -39,9 +39,8 @@ where
D: Deserializer<'de>,
{
let paths = Vec::<PathBuf>::deserialize(deserializer)?;
let home = match dirs::home_dir() {
Some(home) => home,
None => return Ok(paths),
let Some(home) = dirs::home_dir() else {
return Ok(paths);
};
let standardized = paths
.into_iter()
@ -61,9 +60,10 @@ fn standardize_path(path: PathBuf, home: &Path) -> PathBuf {
}
impl Config {
pub fn initial() -> Config {
#[must_use]
pub fn initial() -> Self {
let home_dir = dirs::home_dir().expect("Cannot determine home directory");
Config {
Self {
parser_directories: vec![
home_dir.join("github"),
home_dir.join("src"),
@ -77,7 +77,7 @@ impl Config {
const DYLIB_EXTENSION: &str = "so";
#[cfg(windows)]
const DYLIB_EXTENSION: &'static str = "dll";
const DYLIB_EXTENSION: &str = "dll";
const BUILD_TARGET: &str = env!("BUILD_TARGET");
@ -122,15 +122,16 @@ impl Loader {
let parser_lib_path = match env::var("TREE_SITTER_LIBDIR") {
Ok(path) => PathBuf::from(path),
_ => dirs::cache_dir()
.ok_or(anyhow!("Cannot determine cache directory"))?
.ok_or_else(|| anyhow!("Cannot determine cache directory"))?
.join("tree-sitter")
.join("lib"),
};
Ok(Self::with_parser_lib_path(parser_lib_path))
}
#[must_use]
pub fn with_parser_lib_path(parser_lib_path: PathBuf) -> Self {
Loader {
Self {
parser_lib_path,
languages_by_id: Vec::new(),
language_configurations: Vec::new(),
@ -152,6 +153,7 @@ impl Loader {
highlights.extend(names.iter().cloned());
}
#[must_use]
pub fn highlight_names(&self) -> Vec<String> {
self.highlight_names.lock().unwrap().clone()
}
@ -188,7 +190,7 @@ impl Loader {
.iter()
.map(|c| c.language_id)
.collect::<Vec<_>>();
language_ids.sort();
language_ids.sort_unstable();
language_ids.dedup();
language_ids
.into_iter()
@ -199,6 +201,7 @@ impl Loader {
}
}
#[must_use]
pub fn get_all_language_configurations(&self) -> Vec<(&LanguageConfiguration, &Path)> {
self.language_configurations
.iter()
@ -239,17 +242,14 @@ impl Loader {
if let Some(configuration_ids) = configuration_ids {
if !configuration_ids.is_empty() {
let configuration;
// If there is only one language configuration, then use it.
if configuration_ids.len() == 1 {
configuration = &self.language_configurations[configuration_ids[0]];
let configuration = if configuration_ids.len() == 1 {
&self.language_configurations[configuration_ids[0]]
}
// If multiple language configurations match, then determine which
// one to use by applying the configurations' content regexes.
else {
let file_contents = fs::read(path)
.with_context(|| format!("Failed to read path {:?}", path))?;
let file_contents =
fs::read(path).with_context(|| format!("Failed to read path {path:?}"))?;
let file_contents = String::from_utf8_lossy(&file_contents);
let mut best_score = -2isize;
let mut best_configuration_id = None;
@ -279,8 +279,8 @@ impl Loader {
}
}
configuration = &self.language_configurations[best_configuration_id.unwrap()];
}
&self.language_configurations[best_configuration_id.unwrap()]
};
let language = self.language_for_id(configuration.language_id)?;
return Ok(Some((language, configuration)));
@ -364,7 +364,7 @@ impl Loader {
library_path.set_extension(DYLIB_EXTENSION);
let parser_path = src_path.join("parser.c");
let scanner_path = self.get_scanner_path(&src_path);
let scanner_path = self.get_scanner_path(src_path);
#[cfg(feature = "wasm")]
if self.wasm_store.lock().unwrap().is_some() {
@ -382,7 +382,7 @@ impl Loader {
src_path,
scanner_path
.as_ref()
.and_then(|p| p.strip_prefix(&src_path).ok()),
.and_then(|p| p.strip_prefix(src_path).ok()),
&library_path,
false,
)?;
@ -403,15 +403,15 @@ impl Loader {
}
let library = unsafe { Library::new(&library_path) }
.with_context(|| format!("Error opening dynamic library {:?}", &library_path))?;
.with_context(|| format!("Error opening dynamic library {library_path:?}"))?;
let language = unsafe {
let language_fn: Symbol<unsafe extern "C" fn() -> Language> = library
.get(language_fn_name.as_bytes())
.with_context(|| format!("Failed to load symbol {}", language_fn_name))?;
.with_context(|| format!("Failed to load symbol {language_fn_name}"))?;
language_fn()
};
mem::forget(library);
return Ok(language);
Ok(language)
}
}
@ -437,10 +437,12 @@ impl Loader {
}
if compiler.is_like_msvc() {
command.args(&["/nologo", "/LD"]);
header_paths.iter().for_each(|path| {
command.args(["/nologo", "/LD"]);
for path in header_paths {
command.arg(format!("/I{}", path.to_string_lossy()));
});
}
if self.debug_build {
command.arg("/Od");
} else {
@ -459,11 +461,11 @@ impl Loader {
.arg("-fno-exceptions")
.arg("-g")
.arg("-o")
.arg(&library_path);
.arg(library_path);
header_paths.iter().for_each(|path| {
for path in header_paths {
command.arg(format!("-I{}", path.to_string_lossy()));
});
}
if !cfg!(windows) {
command.arg("-fPIC");
@ -505,7 +507,7 @@ impl Loader {
let command = Command::new("nm")
.arg("-W")
.arg("-U")
.arg(&library_path)
.arg(library_path)
.output();
if let Ok(output) = command {
if output.status.success() {
@ -688,6 +690,7 @@ impl Loader {
Ok(())
}
#[must_use]
pub fn highlight_config_for_injection_string<'a>(
&'a self,
string: &str,
@ -695,10 +698,7 @@ impl Loader {
) -> Option<&'a HighlightConfiguration> {
match self.language_configuration_for_injection_string(string) {
Err(e) => {
eprintln!(
"Failed to load language for injection string '{}': {}",
string, e
);
eprintln!("Failed to load language for injection string '{string}': {e}",);
None
}
Ok(None) => None,
@ -706,8 +706,7 @@ impl Loader {
match configuration.highlight_config(language, apply_all_captures, None) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string, e
"Failed to load property sheet for injection string '{string}': {e}",
);
None
}
@ -735,9 +734,9 @@ impl Loader {
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
PathsJSON::Empty => None,
PathsJSON::Single(s) => Some(vec![s]),
PathsJSON::Multiple(s) => Some(s),
Self::Empty => None,
Self::Single(s) => Some(vec![s]),
Self::Multiple(s) => Some(s),
}
}
}
@ -779,7 +778,7 @@ impl Loader {
let initial_language_configuration_count = self.language_configurations.len();
if let Ok(package_json_contents) = fs::read_to_string(&parser_path.join("package.json")) {
if let Ok(package_json_contents) = fs::read_to_string(parser_path.join("package.json")) {
let package_json = serde_json::from_str::<PackageJSON>(&package_json_contents);
if let Ok(package_json) = package_json {
let language_count = self.languages_by_id.len();
@ -939,6 +938,7 @@ impl Loader {
*self.wasm_store.lock().unwrap() = Some(tree_sitter::WasmStore::new(engine).unwrap())
}
#[must_use]
pub fn get_scanner_path(&self, src_path: &Path) -> Option<PathBuf> {
let mut path = src_path.join("scanner.c");
for extension in ["c", "cc", "cpp"] {
@ -1054,11 +1054,12 @@ impl<'a> LanguageConfiguration<'a> {
if self.use_all_highlight_names {
for capture_name in result.query.capture_names() {
if !all_highlight_names.iter().any(|x| x == capture_name) {
all_highlight_names.push(capture_name.to_string());
all_highlight_names.push((*capture_name).to_string());
}
}
}
result.configure(all_highlight_names.as_slice());
drop(all_highlight_names);
Ok(Some(result))
}
})
@ -1113,15 +1114,16 @@ impl<'a> LanguageConfiguration<'a> {
let (path, range) = ranges
.iter()
.find(|(_, range)| range.contains(&offset_within_section))
.unwrap_or(ranges.last().unwrap());
.unwrap_or_else(|| ranges.last().unwrap());
error.offset = offset_within_section - range.start;
error.row = source[range.start..offset_within_section]
.chars()
.filter(|c| *c == '\n')
.count();
Error::from(error).context(format!("Error in query file {:?}", path))
Error::from(error).context(format!("Error in query file {path:?}"))
}
#[allow(clippy::type_complexity)]
fn read_queries(
&self,
paths: Option<&[String]>,
@ -1134,7 +1136,7 @@ impl<'a> LanguageConfiguration<'a> {
let abs_path = self.root_path.join(path);
let prev_query_len = query.len();
query += &fs::read_to_string(&abs_path)
.with_context(|| format!("Failed to read query file {:?}", path))?;
.with_context(|| format!("Failed to read query file {path:?}"))?;
path_ranges.push((path.clone(), prev_query_len..query.len()));
}
} else {
@ -1142,7 +1144,7 @@ impl<'a> LanguageConfiguration<'a> {
let path = queries_path.join(default_path);
if path.exists() {
query = fs::read_to_string(&path)
.with_context(|| format!("Failed to read query file {:?}", path))?;
.with_context(|| format!("Failed to read query file {path:?}"))?;
path_ranges.push((default_path.to_string(), 0..query.len()));
}
}

View file

@ -21,20 +21,9 @@ const matrix = {
arch: {
'arm64': { name: 'arm64' },
'arm': { name: 'arm' },
'armv7l': { name: 'armv7l' },
'x64': { name: 'x64' },
'x86': { name: 'x86' },
'i586': { name: 'i586' },
'mips': { name: 'mips' },
'mips64': { name: 'mips64' },
'mipsel': { name: 'mipsel' },
'mips64el': { name: 'mips64el' },
'ppc': { name: 'powerpc' },
'ppc64': { name: 'powerpc64' },
'ppc64el': { name: 'powerpc64el' },
'riscv64gc': { name: 'riscv64gc' },
's390x': { name: 's390x' },
'sparc64': { name: 'sparc64' },
}
},
'win32': {

View file

@ -3,38 +3,38 @@ use anyhow::{Context, Result};
use std::path::{Path, PathBuf};
use std::{fs, str};
const BINDING_CC_TEMPLATE: &'static str = include_str!("./templates/binding.cc");
const BINDING_GYP_TEMPLATE: &'static str = include_str!("./templates/binding.gyp");
const INDEX_JS_TEMPLATE: &'static str = include_str!("./templates/index.js");
const LIB_RS_TEMPLATE: &'static str = include_str!("./templates/lib.rs");
const BUILD_RS_TEMPLATE: &'static str = include_str!("./templates/build.rs");
const CARGO_TOML_TEMPLATE: &'static str = include_str!("./templates/cargo.toml");
const PACKAGE_JSON_TEMPLATE: &'static str = include_str!("./templates/package.json");
const PARSER_NAME_PLACEHOLDER: &'static str = "PARSER_NAME";
const CLI_VERSION_PLACEHOLDER: &'static str = "CLI_VERSION";
const CLI_VERSION: &'static str = env!("CARGO_PKG_VERSION");
const RUST_BINDING_VERSION: &'static str = env!("RUST_BINDING_VERSION");
const RUST_BINDING_VERSION_PLACEHOLDER: &'static str = "RUST_BINDING_VERSION";
const BINDING_CC_TEMPLATE: &str = include_str!("./templates/binding.cc");
const BINDING_GYP_TEMPLATE: &str = include_str!("./templates/binding.gyp");
const INDEX_JS_TEMPLATE: &str = include_str!("./templates/index.js");
const LIB_RS_TEMPLATE: &str = include_str!("./templates/lib.rs");
const BUILD_RS_TEMPLATE: &str = include_str!("./templates/build.rs");
const CARGO_TOML_TEMPLATE: &str = include_str!("./templates/cargo.toml");
const PACKAGE_JSON_TEMPLATE: &str = include_str!("./templates/package.json");
const PARSER_NAME_PLACEHOLDER: &str = "PARSER_NAME";
const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION";
const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
const RUST_BINDING_VERSION: &str = env!("RUST_BINDING_VERSION");
const RUST_BINDING_VERSION_PLACEHOLDER: &str = "RUST_BINDING_VERSION";
pub fn generate_binding_files(repo_path: &Path, language_name: &str) -> Result<()> {
let bindings_dir = repo_path.join("bindings");
let dashed_language_name = language_name.replace("_", "-");
let dashed_language_name = language_name.replace('_', "-");
let dashed_language_name = dashed_language_name.as_str();
// Generate rust bindings if needed.
let rust_binding_dir = bindings_dir.join("rust");
create_path(&rust_binding_dir, |path| create_dir(path))?;
create_path(&rust_binding_dir.join("lib.rs").to_owned(), |path| {
create_path(&rust_binding_dir.join("lib.rs"), |path| {
generate_file(path, LIB_RS_TEMPLATE, language_name)
})?;
create_path(&rust_binding_dir.join("build.rs").to_owned(), |path| {
create_path(&rust_binding_dir.join("build.rs"), |path| {
generate_file(path, BUILD_RS_TEMPLATE, language_name)
})?;
create_path(&repo_path.join("Cargo.toml").to_owned(), |path| {
create_path(&repo_path.join("Cargo.toml"), |path| {
generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name)
})?;
@ -42,11 +42,11 @@ pub fn generate_binding_files(repo_path: &Path, language_name: &str) -> Result<(
let node_binding_dir = bindings_dir.join("node");
create_path(&node_binding_dir, |path| create_dir(path))?;
create_path(&node_binding_dir.join("index.js").to_owned(), |path| {
create_path(&node_binding_dir.join("index.js"), |path| {
generate_file(path, INDEX_JS_TEMPLATE, language_name)
})?;
create_path(&node_binding_dir.join("binding.cc").to_owned(), |path| {
create_path(&node_binding_dir.join("binding.cc"), |path| {
generate_file(path, BINDING_CC_TEMPLATE, language_name)
})?;
@ -124,7 +124,7 @@ fn generate_file(path: &Path, template: &str, language_name: &str) -> Result<()>
}
fn create_dir(path: &Path) -> Result<()> {
fs::create_dir_all(&path)
fs::create_dir_all(path)
.with_context(|| format!("Failed to create {:?}", path.to_string_lossy()))
}
@ -147,8 +147,7 @@ where
if !path.exists() {
action(path)?;
return Ok(true);
} else {
else_action(path)?;
}
else_action(path)?;
Ok(false)
}

View file

@ -10,7 +10,7 @@ use std::collections::hash_map::Entry;
use std::collections::{HashMap, VecDeque};
use std::mem;
pub(crate) fn build_lex_table(
pub fn build_lex_table(
parse_table: &mut ParseTable,
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
@ -18,14 +18,13 @@ pub(crate) fn build_lex_table(
coincident_token_index: &CoincidentTokenIndex,
token_conflict_map: &TokenConflictMap,
) -> (LexTable, LexTable) {
let keyword_lex_table;
if syntax_grammar.word_token.is_some() {
let keyword_lex_table = if syntax_grammar.word_token.is_some() {
let mut builder = LexTableBuilder::new(lexical_grammar);
builder.add_state_for_tokens(keywords);
keyword_lex_table = builder.table;
builder.table
} else {
keyword_lex_table = LexTable::default();
}
LexTable::default()
};
let mut parse_state_ids_by_token_set: Vec<(TokenSet, Vec<ParseStateId>)> = Vec::new();
for (i, state) in parse_table.states.iter().enumerate() {
@ -34,7 +33,7 @@ pub(crate) fn build_lex_table(
.keys()
.filter_map(|token| {
if token.is_terminal() {
if keywords.contains(&token) {
if keywords.contains(token) {
syntax_grammar.word_token
} else {
Some(*token)
@ -48,7 +47,7 @@ pub(crate) fn build_lex_table(
.collect();
let mut did_merge = false;
for entry in parse_state_ids_by_token_set.iter_mut() {
for entry in &mut parse_state_ids_by_token_set {
if merge_token_set(
&mut entry.0,
&tokens,
@ -198,7 +197,7 @@ impl<'a> LexTableBuilder<'a> {
for transition in transitions {
if let Some((completed_id, completed_precedence)) = completion {
if !TokenConflictMap::prefer_transition(
&self.lexical_grammar,
self.lexical_grammar,
&transition,
completed_id,
completed_precedence,
@ -248,12 +247,11 @@ fn merge_token_set(
{
return false;
}
if !coincident_token_index.contains(symbol, existing_token) {
if token_conflict_map.does_overlap(existing_token.index, i)
|| token_conflict_map.does_overlap(i, existing_token.index)
{
return false;
}
if !coincident_token_index.contains(symbol, existing_token)
&& (token_conflict_map.does_overlap(existing_token.index, i)
|| token_conflict_map.does_overlap(i, existing_token.index))
{
return false;
}
}
}
@ -315,7 +313,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
let mut new_state = LexState::default();
mem::swap(&mut new_state, &mut table.states[state_ids[0]]);
for (_, advance_action) in new_state.advance_actions.iter_mut() {
for (_, advance_action) in &mut new_state.advance_actions {
advance_action.state = group_ids_by_state_id[advance_action.state];
}
if let Some(eof_action) = &mut new_state.eof_action {
@ -324,18 +322,14 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
new_states.push(new_state);
}
for state in parse_table.states.iter_mut() {
for state in &mut parse_table.states {
state.lex_state_id = group_ids_by_state_id[state.lex_state_id];
}
table.states = new_states;
}
fn lex_states_differ(
left: &LexState,
right: &LexState,
group_ids_by_state_id: &Vec<usize>,
) -> bool {
fn lex_states_differ(left: &LexState, right: &LexState, group_ids_by_state_id: &[usize]) -> bool {
left.advance_actions
.iter()
.zip(right.advance_actions.iter())
@ -362,7 +356,7 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
.map(|old_id| {
let mut state = LexState::default();
mem::swap(&mut state, &mut table.states[*old_id]);
for (_, advance_action) in state.advance_actions.iter_mut() {
for (_, advance_action) in &mut state.advance_actions {
advance_action.state = new_ids_by_old_id[advance_action.state];
}
if let Some(eof_action) = &mut state.eof_action {
@ -373,7 +367,7 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
.collect();
// Update the parse table's lex state references
for state in parse_table.states.iter_mut() {
for state in &mut parse_table.states {
state.lex_state_id = new_ids_by_old_id[state.lex_state_id];
}
}

View file

@ -25,7 +25,7 @@ use rustc_hash::FxHasher;
type SymbolSequence = Vec<Symbol>;
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
pub type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
#[derive(Clone)]
struct AuxiliarySymbolInfo {
@ -75,14 +75,10 @@ impl<'a> ParseTableBuilder<'a> {
self.add_parse_state(
&Vec::new(),
&Vec::new(),
ParseItemSet::with(
[(
ParseItem::start(),
[Symbol::end()].iter().cloned().collect(),
)]
.iter()
.cloned(),
),
ParseItemSet::with(std::iter::once((
ParseItem::start(),
std::iter::once(&Symbol::end()).copied().collect(),
))),
);
// Compute the possible item sets for non-terminal extras.
@ -97,7 +93,7 @@ impl<'a> ParseTableBuilder<'a> {
for production in &variable.productions {
non_terminal_extra_item_sets_by_first_terminal
.entry(production.first_symbol().unwrap())
.or_insert(ParseItemSet::default())
.or_insert_with(ParseItemSet::default)
.insert(
ParseItem {
variable_index: extra_non_terminal.index as u32,
@ -105,9 +101,8 @@ impl<'a> ParseTableBuilder<'a> {
step_index: 1,
has_preceding_inherited_fields: false,
},
&[Symbol::end_of_nonterminal_extra()]
.iter()
.cloned()
&std::iter::once(&Symbol::end_of_nonterminal_extra())
.copied()
.collect(),
);
}
@ -129,7 +124,7 @@ impl<'a> ParseTableBuilder<'a> {
self.parse_state_info_by_id[entry.state_id].0.clone(),
entry.preceding_auxiliary_symbols,
entry.state_id,
item_set,
&item_set,
)?;
}
@ -195,7 +190,7 @@ impl<'a> ParseTableBuilder<'a> {
mut preceding_symbols: SymbolSequence,
mut preceding_auxiliary_symbols: Vec<AuxiliarySymbolInfo>,
state_id: ParseStateId,
item_set: ParseItemSet<'a>,
item_set: &ParseItemSet<'a>,
) -> Result<()> {
let mut terminal_successors = BTreeMap::new();
let mut non_terminal_successors = BTreeMap::new();
@ -218,7 +213,7 @@ impl<'a> ParseTableBuilder<'a> {
// for conflict resolution.
if variable.is_auxiliary() {
preceding_auxiliary_symbols
.push(self.get_auxiliary_node_info(&item_set, next_symbol));
.push(self.get_auxiliary_node_info(item_set, next_symbol));
}
// For most parse items, the symbols associated with the preceding children
@ -238,12 +233,12 @@ impl<'a> ParseTableBuilder<'a> {
non_terminal_successors
.entry(next_symbol)
.or_insert_with(|| ParseItemSet::default())
.or_insert_with(ParseItemSet::default)
.insert(successor, lookaheads);
} else {
terminal_successors
.entry(next_symbol)
.or_insert_with(|| ParseItemSet::default())
.or_insert_with(ParseItemSet::default)
.insert(successor, lookaheads);
}
}
@ -268,7 +263,7 @@ impl<'a> ParseTableBuilder<'a> {
let table_entry = self.parse_table.states[state_id]
.terminal_entries
.entry(lookahead)
.or_insert_with(|| ParseTableEntry::new());
.or_insert_with(ParseTableEntry::new);
let reduction_info = reduction_infos.entry(lookahead).or_default();
// While inserting Reduce actions, eagerly resolve conflicts related
@ -278,7 +273,7 @@ impl<'a> ParseTableBuilder<'a> {
table_entry.actions.push(action);
} else {
match Self::compare_precedence(
&self.syntax_grammar,
self.syntax_grammar,
precedence,
&[symbol],
&reduction_info.precedence,
@ -333,7 +328,7 @@ impl<'a> ParseTableBuilder<'a> {
}
entry
.or_insert_with(|| ParseTableEntry::new())
.or_insert_with(ParseTableEntry::new)
.actions
.push(ParseAction::Shift {
state: next_state_id,
@ -361,7 +356,7 @@ impl<'a> ParseTableBuilder<'a> {
// * fail, terminating the parser generation process
for symbol in lookaheads_with_conflicts.iter() {
self.handle_conflict(
&item_set,
item_set,
state_id,
&preceding_symbols,
&preceding_auxiliary_symbols,
@ -444,7 +439,7 @@ impl<'a> ParseTableBuilder<'a> {
item_set: &ParseItemSet,
state_id: ParseStateId,
preceding_symbols: &SymbolSequence,
preceding_auxiliary_symbols: &Vec<AuxiliarySymbolInfo>,
preceding_auxiliary_symbols: &[AuxiliarySymbolInfo],
conflicting_lookahead: Symbol,
reduction_info: &ReductionInfo,
) -> Result<()> {
@ -464,29 +459,27 @@ impl<'a> ParseTableBuilder<'a> {
let mut conflicting_items = HashSet::new();
for (item, lookaheads) in &item_set.entries {
if let Some(step) = item.step() {
if item.step_index > 0 {
if self
if item.step_index > 0
&& self
.item_set_builder
.first_set(&step.symbol)
.contains(&conflicting_lookahead)
{
if item.variable_index != u32::MAX {
conflicting_items.insert(item);
}
{
if item.variable_index != u32::MAX {
conflicting_items.insert(item);
}
let p = (
item.precedence(),
Symbol::non_terminal(item.variable_index as usize),
);
if let Err(i) = shift_precedence.binary_search(&p) {
shift_precedence.insert(i, p);
}
let p = (
item.precedence(),
Symbol::non_terminal(item.variable_index as usize),
);
if let Err(i) = shift_precedence.binary_search(&p) {
shift_precedence.insert(i, p);
}
}
} else if lookaheads.contains(&conflicting_lookahead) {
if item.variable_index != u32::MAX {
conflicting_items.insert(item);
}
} else if lookaheads.contains(&conflicting_lookahead) && item.variable_index != u32::MAX
{
conflicting_items.insert(item);
}
}
@ -512,7 +505,7 @@ impl<'a> ParseTableBuilder<'a> {
let mut shift_is_more = false;
for p in shift_precedence {
match Self::compare_precedence(
&self.syntax_grammar,
self.syntax_grammar,
p.0,
&[p.1],
&reduction_info.precedence,
@ -655,11 +648,10 @@ impl<'a> ParseTableBuilder<'a> {
let prec_line = if let Some(associativity) = associativity {
Some(format!(
"(precedence: {}, associativity: {:?})",
precedence, associativity
"(precedence: {precedence}, associativity: {associativity:?})",
))
} else if !precedence.is_none() {
Some(format!("(precedence: {})", precedence))
Some(format!("(precedence: {precedence})"))
} else {
None
};
@ -723,24 +715,22 @@ impl<'a> ParseTableBuilder<'a> {
};
if actual_conflict.len() > 1 {
if shift_items.len() > 0 {
if !shift_items.is_empty() {
resolution_count += 1;
write!(
&mut msg,
" {}: Specify a higher precedence in",
resolution_count
" {resolution_count}: Specify a higher precedence in",
)
.unwrap();
list_rule_names(&mut msg, &shift_items);
write!(&mut msg, " than in the other rules.\n").unwrap();
writeln!(&mut msg, " than in the other rules.").unwrap();
}
for item in &reduce_items {
resolution_count += 1;
write!(
writeln!(
&mut msg,
" {}: Specify a higher precedence in `{}` than in the other rules.\n",
resolution_count,
" {resolution_count}: Specify a higher precedence in `{}` than in the other rules.",
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
)
.unwrap();
@ -751,19 +741,17 @@ impl<'a> ParseTableBuilder<'a> {
resolution_count += 1;
write!(
&mut msg,
" {}: Specify a left or right associativity in",
resolution_count
" {resolution_count}: Specify a left or right associativity in",
)
.unwrap();
list_rule_names(&mut msg, &reduce_items);
write!(&mut msg, "\n").unwrap();
writeln!(&mut msg).unwrap();
}
resolution_count += 1;
write!(
&mut msg,
" {}: Add a conflict for these rules: ",
resolution_count
" {resolution_count}: Add a conflict for these rules: ",
)
.unwrap();
for (i, symbol) in actual_conflict.iter().enumerate() {
@ -772,7 +760,7 @@ impl<'a> ParseTableBuilder<'a> {
}
write!(&mut msg, "`{}`", self.symbol_name(symbol)).unwrap();
}
write!(&mut msg, "\n").unwrap();
writeln!(&mut msg).unwrap();
Err(anyhow!(msg))
}
@ -805,7 +793,7 @@ impl<'a> ParseTableBuilder<'a> {
// and to the default precedence, which is zero.
(Precedence::Integer(l), Precedence::Integer(r)) if *l != 0 || *r != 0 => l.cmp(r),
(Precedence::Integer(l), Precedence::None) if *l != 0 => l.cmp(&0),
(Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(&r),
(Precedence::None, Precedence::Integer(r)) if *r != 0 => 0.cmp(r),
// Named precedences can be compared to other named precedences.
_ => grammar
@ -872,7 +860,7 @@ impl<'a> ParseTableBuilder<'a> {
production_info
.field_map
.entry(field_name.clone())
.or_insert(Vec::new())
.or_default()
.push(FieldLocation {
index: i,
inherited: false,
@ -885,11 +873,11 @@ impl<'a> ParseTableBuilder<'a> {
.is_visible()
{
let info = &self.variable_info[step.symbol.index];
for (field_name, _) in &info.fields {
for field_name in info.fields.keys() {
production_info
.field_map
.entry(field_name.clone())
.or_insert(Vec::new())
.or_default()
.push(FieldLocation {
index: i,
inherited: true,
@ -903,7 +891,7 @@ impl<'a> ParseTableBuilder<'a> {
}
if item.production.steps.len() > self.parse_table.max_aliased_production_length {
self.parse_table.max_aliased_production_length = item.production.steps.len()
self.parse_table.max_aliased_production_length = item.production.steps.len();
}
if let Some(index) = self
@ -939,7 +927,7 @@ impl<'a> ParseTableBuilder<'a> {
}
fn populate_following_tokens(
result: &mut Vec<TokenSet>,
result: &mut [TokenSet],
grammar: &SyntaxGrammar,
inlines: &InlinedProductionMap,
builder: &ParseItemSetBuilder,
@ -950,7 +938,6 @@ fn populate_following_tokens(
.flat_map(|v| &v.productions)
.chain(&inlines.productions);
let all_tokens = (0..result.len())
.into_iter()
.map(Symbol::terminal)
.collect::<TokenSet>();
for production in productions {
@ -974,7 +961,7 @@ fn populate_following_tokens(
}
}
pub(crate) fn build_parse_table<'a>(
pub fn build_parse_table<'a>(
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
inlines: &'a InlinedProductionMap,

View file

@ -3,7 +3,7 @@ use crate::generate::rules::Symbol;
use crate::generate::tables::{ParseStateId, ParseTable};
use std::fmt;
pub(crate) struct CoincidentTokenIndex<'a> {
pub struct CoincidentTokenIndex<'a> {
entries: Vec<Vec<ParseStateId>>,
grammar: &'a LexicalGrammar,
n: usize,
@ -23,7 +23,7 @@ impl<'a> CoincidentTokenIndex<'a> {
for other_symbol in state.terminal_entries.keys() {
if other_symbol.is_terminal() {
let index = result.index(symbol.index, other_symbol.index);
if result.entries[index].last().cloned() != Some(i) {
if result.entries[index].last().copied() != Some(i) {
result.entries[index].push(i);
}
}
@ -42,7 +42,8 @@ impl<'a> CoincidentTokenIndex<'a> {
!self.entries[self.index(a.index, b.index)].is_empty()
}
fn index(&self, a: usize, b: usize) -> usize {
#[must_use]
const fn index(&self, a: usize, b: usize) -> usize {
if a < b {
a * self.n + b
} else {
@ -53,20 +54,20 @@ impl<'a> CoincidentTokenIndex<'a> {
impl<'a> fmt::Debug for CoincidentTokenIndex<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "CoincidentTokenIndex {{\n")?;
writeln!(f, "CoincidentTokenIndex {{")?;
write!(f, " entries: {{\n")?;
writeln!(f, " entries: {{")?;
for i in 0..self.n {
write!(f, " {}: {{\n", self.grammar.variables[i].name)?;
writeln!(f, " {}: {{", self.grammar.variables[i].name)?;
for j in 0..self.n {
write!(
writeln!(
f,
" {}: {:?},\n",
" {}: {:?},",
self.grammar.variables[j].name,
self.entries[self.index(i, j)].len()
)?;
}
write!(f, " }},\n")?;
writeln!(f, " }},")?;
}
write!(f, " }},")?;
write!(f, "}}")?;

View file

@ -22,9 +22,9 @@ lazy_static! {
};
}
/// A ParseItem represents an in-progress match of a single production in a grammar.
/// A [`ParseItem`] represents an in-progress match of a single production in a grammar.
#[derive(Clone, Copy, Debug)]
pub(crate) struct ParseItem<'a> {
pub struct ParseItem<'a> {
/// The index of the parent rule within the grammar.
pub variable_index: u32,
/// The number of symbols that have already been matched.
@ -47,35 +47,35 @@ pub(crate) struct ParseItem<'a> {
pub has_preceding_inherited_fields: bool,
}
/// A ParseItemSet represents a set of in-progress matches of productions in a
/// A [`ParseItemSet`] represents a set of in-progress matches of productions in a
/// grammar, and for each in-progress match, a set of "lookaheads" - tokens that
/// are allowed to *follow* the in-progress rule. This object corresponds directly
/// to a state in the final parse table.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseItemSet<'a> {
#[derive(Clone, Debug, PartialEq, Eq, Default)]
pub struct ParseItemSet<'a> {
pub entries: Vec<(ParseItem<'a>, TokenSet)>,
}
/// A ParseItemSetCore is like a ParseItemSet, but without the lookahead
/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead
/// information. Parse states with the same core are candidates for merging.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseItemSetCore<'a> {
pub struct ParseItemSetCore<'a> {
pub entries: Vec<ParseItem<'a>>,
}
pub(crate) struct ParseItemDisplay<'a>(
pub struct ParseItemDisplay<'a>(
pub &'a ParseItem<'a>,
pub &'a SyntaxGrammar,
pub &'a LexicalGrammar,
);
pub(crate) struct TokenSetDisplay<'a>(
pub struct TokenSetDisplay<'a>(
pub &'a TokenSet,
pub &'a SyntaxGrammar,
pub &'a LexicalGrammar,
);
pub(crate) struct ParseItemSetDisplay<'a>(
pub struct ParseItemSetDisplay<'a>(
pub &'a ParseItemSet<'a>,
pub &'a SyntaxGrammar,
pub &'a LexicalGrammar,
@ -116,16 +116,19 @@ impl<'a> ParseItem<'a> {
}
}
#[must_use]
pub fn is_done(&self) -> bool {
self.step_index as usize == self.production.steps.len()
}
pub fn is_augmented(&self) -> bool {
#[must_use]
pub const fn is_augmented(&self) -> bool {
self.variable_index == u32::MAX
}
/// Create an item like this one, but advanced by one step.
pub fn successor(&self) -> ParseItem<'a> {
#[must_use]
pub const fn successor(&self) -> ParseItem<'a> {
ParseItem {
variable_index: self.variable_index,
production: self.production,
@ -136,8 +139,8 @@ impl<'a> ParseItem<'a> {
/// Create an item identical to this one, but with a different production.
/// This is used when dynamically "inlining" certain symbols in a production.
pub fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
let mut result = self.clone();
pub const fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> {
let mut result = *self;
result.production = production;
result
}
@ -172,14 +175,6 @@ impl<'a> ParseItemSet<'a> {
}
}
impl<'a> Default for ParseItemSet<'a> {
fn default() -> Self {
Self {
entries: Vec::new(),
}
}
}
impl<'a> fmt::Display for ParseItemDisplay<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
if self.0.is_augmented() {
@ -196,10 +191,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
if i == self.0.step_index as usize {
write!(f, "")?;
if let Some(associativity) = step.associativity {
if !step.precedence.is_none() {
write!(f, " ({} {:?})", step.precedence, associativity)?;
if step.precedence.is_none() {
write!(f, " ({associativity:?})")?;
} else {
write!(f, " ({:?})", associativity)?;
write!(f, " ({} {associativity:?})", step.precedence)?;
}
} else if !step.precedence.is_none() {
write!(f, " ({})", step.precedence)?;
@ -211,7 +206,7 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
if let Some(variable) = self.2.variables.get(step.symbol.index) {
write!(f, "{}", &variable.name)?;
} else {
write!(f, "{}-{}", "terminal", step.symbol.index)?;
write!(f, "terminal-{}", step.symbol.index)?;
}
} else if step.symbol.is_external() {
write!(f, "{}", &self.1.external_tokens[step.symbol.index].name)?;
@ -228,10 +223,10 @@ impl<'a> fmt::Display for ParseItemDisplay<'a> {
write!(f, "")?;
if let Some(step) = self.0.production.steps.last() {
if let Some(associativity) = step.associativity {
if !step.precedence.is_none() {
write!(f, " ({} {:?})", step.precedence, associativity)?;
if step.precedence.is_none() {
write!(f, " ({associativity:?})")?;
} else {
write!(f, " ({:?})", associativity)?;
write!(f, " ({} {associativity:?})", step.precedence)?;
}
} else if !step.precedence.is_none() {
write!(f, " ({})", step.precedence)?;
@ -255,7 +250,7 @@ impl<'a> fmt::Display for TokenSetDisplay<'a> {
if let Some(variable) = self.2.variables.get(symbol.index) {
write!(f, "{}", &variable.name)?;
} else {
write!(f, "{}-{}", "terminal", symbol.index)?;
write!(f, "terminal-{}", symbol.index)?;
}
} else if symbol.is_external() {
write!(f, "{}", &self.1.external_tokens[symbol.index].name)?;
@ -270,7 +265,7 @@ impl<'a> fmt::Display for TokenSetDisplay<'a> {
impl<'a> fmt::Display for ParseItemSetDisplay<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
for (item, lookaheads) in self.0.entries.iter() {
for (item, lookaheads) in &self.0.entries {
writeln!(
f,
"{}\t{}",
@ -288,7 +283,7 @@ impl<'a> Hash for ParseItem<'a> {
hasher.write_u32(self.step_index);
hasher.write_i32(self.production.dynamic_precedence);
hasher.write_usize(self.production.steps.len());
hasher.write_i32(self.has_preceding_inherited_fields as i32);
hasher.write_i32(i32::from(self.has_preceding_inherited_fields));
self.precedence().hash(hasher);
self.associativity().hash(hasher);
@ -344,7 +339,7 @@ impl<'a> PartialEq for ParseItem<'a> {
}
}
return true;
true
}
}
@ -364,7 +359,7 @@ impl<'a> Ord for ParseItem<'a> {
.len()
.cmp(&other.production.steps.len())
})
.then_with(|| self.precedence().cmp(&other.precedence()))
.then_with(|| self.precedence().cmp(other.precedence()))
.then_with(|| self.associativity().cmp(&other.associativity()))
.then_with(|| {
for (i, step) in self.production.steps.iter().enumerate() {
@ -383,7 +378,7 @@ impl<'a> Ord for ParseItem<'a> {
return o;
}
}
return Ordering::Equal;
Ordering::Equal
})
}
}
@ -399,7 +394,7 @@ impl<'a> Eq for ParseItem<'a> {}
impl<'a> Hash for ParseItemSet<'a> {
fn hash<H: Hasher>(&self, hasher: &mut H) {
hasher.write_usize(self.entries.len());
for (item, lookaheads) in self.entries.iter() {
for (item, lookaheads) in &self.entries {
item.hash(hasher);
lookaheads.hash(hasher);
}

View file

@ -16,7 +16,7 @@ struct FollowSetInfo {
propagates_lookaheads: bool,
}
pub(crate) struct ParseItemSetBuilder<'a> {
pub struct ParseItemSetBuilder<'a> {
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
first_sets: HashMap<Symbol, TokenSet>,
@ -80,7 +80,10 @@ impl<'a> ParseItemSetBuilder<'a> {
for i in 0..syntax_grammar.variables.len() {
let symbol = Symbol::non_terminal(i);
let first_set = &mut result.first_sets.entry(symbol).or_insert(TokenSet::new());
let first_set = result
.first_sets
.entry(symbol)
.or_insert_with(TokenSet::new);
processed_non_terminals.clear();
symbols_to_process.clear();
symbols_to_process.push(symbol);
@ -88,10 +91,7 @@ impl<'a> ParseItemSetBuilder<'a> {
if current_symbol.is_terminal() || current_symbol.is_external() {
first_set.insert(current_symbol);
} else if processed_non_terminals.insert(current_symbol) {
for production in syntax_grammar.variables[current_symbol.index]
.productions
.iter()
{
for production in &syntax_grammar.variables[current_symbol.index].productions {
if let Some(step) = production.steps.first() {
symbols_to_process.push(step.symbol);
}
@ -100,7 +100,7 @@ impl<'a> ParseItemSetBuilder<'a> {
}
// The LAST set is defined in a similar way to the FIRST set.
let last_set = &mut result.last_sets.entry(symbol).or_insert(TokenSet::new());
let last_set = result.last_sets.entry(symbol).or_insert_with(TokenSet::new);
processed_non_terminals.clear();
symbols_to_process.clear();
symbols_to_process.push(symbol);
@ -108,10 +108,7 @@ impl<'a> ParseItemSetBuilder<'a> {
if current_symbol.is_terminal() || current_symbol.is_external() {
last_set.insert(current_symbol);
} else if processed_non_terminals.insert(current_symbol) {
for production in syntax_grammar.variables[current_symbol.index]
.productions
.iter()
{
for production in &syntax_grammar.variables[current_symbol.index].productions {
if let Some(step) = production.steps.last() {
symbols_to_process.push(step.symbol);
}
@ -235,7 +232,7 @@ impl<'a> ParseItemSetBuilder<'a> {
result
}
pub(crate) fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
pub fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
let mut result = ParseItemSet::default();
for (item, lookaheads) in &item_set.entries {
if let Some(productions) = self
@ -270,11 +267,9 @@ impl<'a> ParseItemSetBuilder<'a> {
let next_step = item.successor().step();
// Determine which tokens can follow this non-terminal.
let following_tokens = if let Some(next_step) = next_step {
let following_tokens = next_step.map_or(lookaheads, |next_step| {
self.first_sets.get(&next_step.symbol).unwrap()
} else {
&lookaheads
};
});
// Use the pre-computed *additions* to expand the non-terminal.
for addition in &self.transitive_closure_additions[step.symbol.index] {
@ -291,9 +286,9 @@ impl<'a> ParseItemSetBuilder<'a> {
impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "ParseItemSetBuilder {{\n")?;
writeln!(f, "ParseItemSetBuilder {{")?;
write!(f, " first_sets: {{\n")?;
writeln!(f, " first_sets: {{")?;
for (symbol, first_set) in &self.first_sets {
let name = match symbol.kind {
SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
@ -301,16 +296,15 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
};
write!(
writeln!(
f,
" first({:?}): {}\n",
name,
TokenSetDisplay(first_set, &self.syntax_grammar, &self.lexical_grammar)
" first({name:?}): {}",
TokenSetDisplay(first_set, self.syntax_grammar, self.lexical_grammar)
)?;
}
write!(f, " }}\n")?;
writeln!(f, " }}")?;
write!(f, " last_sets: {{\n")?;
writeln!(f, " last_sets: {{")?;
for (symbol, last_set) in &self.last_sets {
let name = match symbol.kind {
SymbolType::NonTerminal => &self.syntax_grammar.variables[symbol.index].name,
@ -318,26 +312,25 @@ impl<'a> fmt::Debug for ParseItemSetBuilder<'a> {
SymbolType::Terminal => &self.lexical_grammar.variables[symbol.index].name,
SymbolType::End | SymbolType::EndOfNonTerminalExtra => "END",
};
write!(
writeln!(
f,
" last({:?}): {}\n",
name,
TokenSetDisplay(last_set, &self.syntax_grammar, &self.lexical_grammar)
" last({name:?}): {}",
TokenSetDisplay(last_set, self.syntax_grammar, self.lexical_grammar)
)?;
}
write!(f, " }}\n")?;
writeln!(f, " }}")?;
write!(f, " additions: {{\n")?;
writeln!(f, " additions: {{")?;
for (i, variable) in self.syntax_grammar.variables.iter().enumerate() {
write!(f, " {}: {{\n", variable.name)?;
writeln!(f, " {}: {{", variable.name)?;
for addition in &self.transitive_closure_additions[i] {
write!(
writeln!(
f,
" {}\n",
" {}",
ParseItemDisplay(&addition.item, self.syntax_grammar, self.lexical_grammar)
)?;
}
write!(f, " }},\n")?;
writeln!(f, " }},")?;
}
write!(f, " }},")?;

View file

@ -9,7 +9,7 @@ use log::info;
use std::collections::{HashMap, HashSet};
use std::mem;
pub(crate) fn minimize_parse_table(
pub fn minimize_parse_table(
parse_table: &mut ParseTable,
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
@ -67,9 +67,9 @@ impl<'a> Minimizer<'a> {
symbol,
..
} => {
if !self.simple_aliases.contains_key(&symbol)
&& !self.syntax_grammar.supertype_symbols.contains(&symbol)
&& !aliased_symbols.contains(&symbol)
if !self.simple_aliases.contains_key(symbol)
&& !self.syntax_grammar.supertype_symbols.contains(symbol)
&& !aliased_symbols.contains(symbol)
&& self.syntax_grammar.variables[symbol.index].kind
!= VariableType::Named
&& (unit_reduction_symbol.is_none()
@ -97,21 +97,22 @@ impl<'a> Minimizer<'a> {
}
}
for state in self.parse_table.states.iter_mut() {
for state in &mut self.parse_table.states {
let mut done = false;
while !done {
done = true;
state.update_referenced_states(|other_state_id, state| {
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
done = false;
match state.nonterminal_entries.get(symbol) {
Some(GotoAction::Goto(state_id)) => *state_id,
_ => other_state_id,
}
} else {
other_state_id
}
})
unit_reduction_symbols_by_state.get(&other_state_id).map_or(
other_state_id,
|symbol| {
done = false;
match state.nonterminal_entries.get(symbol) {
Some(GotoAction::Goto(state_id)) => *state_id,
_ => other_state_id,
}
},
)
});
}
}
}
@ -198,7 +199,7 @@ impl<'a> Minimizer<'a> {
&self,
left_state: &ParseState,
right_state: &ParseState,
group_ids_by_state_id: &Vec<ParseStateId>,
group_ids_by_state_id: &[ParseStateId],
) -> bool {
for (token, left_entry) in &left_state.terminal_entries {
if let Some(right_entry) = right_state.terminal_entries.get(token) {
@ -223,15 +224,15 @@ impl<'a> Minimizer<'a> {
}
for token in right_state.terminal_entries.keys() {
if !left_state.terminal_entries.contains_key(token) {
if self.token_conflicts(
if !left_state.terminal_entries.contains_key(token)
&& self.token_conflicts(
left_state.id,
right_state.id,
left_state.terminal_entries.keys(),
*token,
) {
return true;
}
)
{
return true;
}
}
@ -242,7 +243,7 @@ impl<'a> Minimizer<'a> {
&self,
state1: &ParseState,
state2: &ParseState,
group_ids_by_state_id: &Vec<ParseStateId>,
group_ids_by_state_id: &[ParseStateId],
) -> bool {
for (token, entry1) in &state1.terminal_entries {
if let ParseAction::Shift { state: s1, .. } = entry1.actions.last().unwrap() {
@ -252,12 +253,10 @@ impl<'a> Minimizer<'a> {
let group2 = group_ids_by_state_id[*s2];
if group1 != group2 {
info!(
"split states {} {} - successors for {} are split: {} {}",
"split states {} {} - successors for {} are split: {s1} {s2}",
state1.id,
state2.id,
self.symbol_name(token),
s1,
s2,
);
return true;
}
@ -275,12 +274,10 @@ impl<'a> Minimizer<'a> {
let group2 = group_ids_by_state_id[*s2];
if group1 != group2 {
info!(
"split states {} {} - successors for {} are split: {} {}",
"split states {} {} - successors for {} are split: {s1} {s2}",
state1.id,
state2.id,
self.symbol_name(symbol),
s1,
s2,
);
return true;
}
@ -300,16 +297,14 @@ impl<'a> Minimizer<'a> {
token: &Symbol,
entry1: &ParseTableEntry,
entry2: &ParseTableEntry,
group_ids_by_state_id: &Vec<ParseStateId>,
group_ids_by_state_id: &[ParseStateId],
) -> bool {
// To be compatible, entries need to have the same actions.
let actions1 = &entry1.actions;
let actions2 = &entry2.actions;
if actions1.len() != actions2.len() {
info!(
"split states {} {} - differing action counts for token {}",
state_id1,
state_id2,
"split states {state_id1} {state_id2} - differing action counts for token {}",
self.symbol_name(token)
);
return true;
@ -334,22 +329,15 @@ impl<'a> Minimizer<'a> {
let group2 = group_ids_by_state_id[*s2];
if group1 == group2 && is_repetition1 == is_repetition2 {
continue;
} else {
info!(
"split states {} {} - successors for {} are split: {} {}",
state_id1,
state_id2,
self.symbol_name(token),
s1,
s2,
);
return true;
}
info!(
"split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}",
self.symbol_name(token),
);
return true;
} else if action1 != action2 {
info!(
"split states {} {} - unequal actions for {}",
state_id1,
state_id2,
"split states {state_id1} {state_id2} - unequal actions for {}",
self.symbol_name(token),
);
return true;
@ -367,10 +355,7 @@ impl<'a> Minimizer<'a> {
new_token: Symbol,
) -> bool {
if new_token == Symbol::end_of_nonterminal_extra() {
info!(
"split states {} {} - end of non-terminal extra",
left_id, right_id,
);
info!("split states {left_id} {right_id} - end of non-terminal extra",);
return true;
}
@ -378,9 +363,7 @@ impl<'a> Minimizer<'a> {
// existing lookahead tokens.
if new_token.is_external() {
info!(
"split states {} {} - external token {}",
left_id,
right_id,
"split states {left_id} {right_id} - external token {}",
self.symbol_name(&new_token),
);
return true;
@ -395,9 +378,7 @@ impl<'a> Minimizer<'a> {
.any(|external| external.corresponding_internal_token == Some(new_token))
{
info!(
"split states {} {} - internal/external token {}",
left_id,
right_id,
"split states {left_id} {right_id} - internal/external token {}",
self.symbol_name(&new_token),
);
return true;
@ -405,27 +386,24 @@ impl<'a> Minimizer<'a> {
// Do not add a token if it conflicts with an existing token.
for token in existing_tokens {
if token.is_terminal() {
if !(self.syntax_grammar.word_token == Some(*token)
if token.is_terminal()
&& !(self.syntax_grammar.word_token == Some(*token)
&& self.keywords.contains(&new_token))
&& !(self.syntax_grammar.word_token == Some(new_token)
&& self.keywords.contains(token))
&& (self
&& !(self.syntax_grammar.word_token == Some(new_token)
&& self.keywords.contains(token))
&& (self
.token_conflict_map
.does_conflict(new_token.index, token.index)
|| self
.token_conflict_map
.does_conflict(new_token.index, token.index)
|| self
.token_conflict_map
.does_match_same_string(new_token.index, token.index))
{
info!(
"split states {} {} - token {} conflicts with {}",
left_id,
right_id,
self.symbol_name(&new_token),
self.symbol_name(token),
);
return true;
}
.does_match_same_string(new_token.index, token.index))
{
info!(
"split states {left_id} {right_id} - token {} conflicts with {}",
self.symbol_name(&new_token),
self.symbol_name(token),
);
return true;
}
}

View file

@ -1,5 +1,5 @@
pub(crate) mod build_lex_table;
pub(crate) mod build_parse_table;
pub mod build_lex_table;
pub mod build_parse_table;
mod coincident_tokens;
mod item;
mod item_set_builder;
@ -20,7 +20,7 @@ use anyhow::Result;
use log::info;
use std::collections::{BTreeSet, HashMap};
pub(crate) fn build_tables(
pub fn build_tables(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
@ -69,8 +69,8 @@ pub(crate) fn build_tables(
if let Some(report_symbol_name) = report_symbol_name {
report_state_info(
&syntax_grammar,
&lexical_grammar,
syntax_grammar,
lexical_grammar,
&parse_table,
&parse_state_info,
report_symbol_name,
@ -98,9 +98,8 @@ fn populate_error_state(
// First identify the *conflict-free tokens*: tokens that do not overlap with
// any other token in any way, besides matching exactly the same string.
let conflict_free_tokens: TokenSet = (0..n)
.into_iter()
.filter_map(|i| {
let conflicts_with_other_tokens = (0..n).into_iter().any(|j| {
let conflicts_with_other_tokens = (0..n).any(|j| {
j != i
&& !coincident_token_index.contains(Symbol::terminal(i), Symbol::terminal(j))
&& token_conflict_map.does_match_shorter_or_longer(i, j)
@ -126,18 +125,19 @@ fn populate_error_state(
// the *conflict-free tokens* identified above.
for i in 0..n {
let symbol = Symbol::terminal(i);
if !conflict_free_tokens.contains(&symbol) && !keywords.contains(&symbol) {
if syntax_grammar.word_token != Some(symbol) {
if let Some(t) = conflict_free_tokens.iter().find(|t| {
!coincident_token_index.contains(symbol, *t)
&& token_conflict_map.does_conflict(symbol.index, t.index)
}) {
info!(
"error recovery - exclude token {} because of conflict with {}",
lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
);
continue;
}
if !conflict_free_tokens.contains(&symbol)
&& !keywords.contains(&symbol)
&& syntax_grammar.word_token != Some(symbol)
{
if let Some(t) = conflict_free_tokens.iter().find(|t| {
!coincident_token_index.contains(symbol, *t)
&& token_conflict_map.does_conflict(symbol.index, t.index)
}) {
info!(
"error recovery - exclude token {} because of conflict with {}",
lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
);
continue;
}
}
info!(
@ -361,7 +361,7 @@ fn mark_fragile_tokens(
) {
let n = lexical_grammar.variables.len();
let mut valid_tokens_mask = Vec::with_capacity(n);
for state in parse_table.states.iter_mut() {
for state in &mut parse_table.states {
valid_tokens_mask.clear();
valid_tokens_mask.resize(n, false);
for token in state.terminal_entries.keys() {
@ -369,14 +369,12 @@ fn mark_fragile_tokens(
valid_tokens_mask[token.index] = true;
}
}
for (token, entry) in state.terminal_entries.iter_mut() {
for (token, entry) in &mut state.terminal_entries {
if token.is_terminal() {
for (i, is_valid) in valid_tokens_mask.iter().enumerate() {
if *is_valid {
if token_conflict_map.does_overlap(i, token.index) {
entry.reusable = false;
break;
}
if *is_valid && token_conflict_map.does_overlap(i, token.index) {
entry.reusable = false;
break;
}
}
}
@ -388,7 +386,7 @@ fn report_state_info<'a>(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
parse_table: &ParseTable,
parse_state_info: &Vec<ParseStateInfo<'a>>,
parse_state_info: &[ParseStateInfo<'a>],
report_symbol_name: &'a str,
) {
let mut all_state_indices = BTreeSet::new();
@ -399,7 +397,7 @@ fn report_state_info<'a>(
for (i, state) in parse_table.states.iter().enumerate() {
all_state_indices.insert(i);
let item_set = &parse_state_info[state.id];
for (item, _) in item_set.1.entries.iter() {
for (item, _) in &item_set.1.entries {
if !item.is_augmented() {
symbols_with_state_indices[item.variable_index as usize]
.1
@ -424,7 +422,7 @@ fn report_state_info<'a>(
width = max_symbol_name_length
);
}
eprintln!("");
eprintln!();
let state_indices = if report_symbol_name == "*" {
Some(&all_state_indices)
@ -441,14 +439,14 @@ fn report_state_info<'a>(
};
if let Some(state_indices) = state_indices {
let mut state_indices = state_indices.into_iter().cloned().collect::<Vec<_>>();
let mut state_indices = state_indices.iter().copied().collect::<Vec<_>>();
state_indices.sort_unstable_by_key(|i| (parse_table.states[*i].core_id, *i));
for state_index in state_indices {
let id = parse_table.states[state_index].id;
let (preceding_symbols, item_set) = &parse_state_info[id];
eprintln!("state index: {}", state_index);
eprintln!("state id: {}", id);
eprintln!("state index: {state_index}");
eprintln!("state id: {id}");
eprint!("symbol sequence:");
for symbol in preceding_symbols {
let name = if symbol.is_terminal() {
@ -458,11 +456,11 @@ fn report_state_info<'a>(
} else {
&syntax_grammar.variables[symbol.index].name
};
eprint!(" {}", name);
eprint!(" {name}");
}
eprintln!(
"\nitems:\n{}",
self::item::ParseItemSetDisplay(&item_set, syntax_grammar, lexical_grammar,),
self::item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar,),
);
}
}

View file

@ -16,7 +16,7 @@ struct TokenConflictStatus {
matches_different_string: bool,
}
pub(crate) struct TokenConflictMap<'a> {
pub struct TokenConflictMap<'a> {
n: usize,
status_matrix: Vec<TokenConflictStatus>,
following_tokens: Vec<TokenSet>,
@ -104,19 +104,17 @@ impl<'a> TokenConflictMap<'a> {
}
pub fn prefer_token(grammar: &LexicalGrammar, left: (i32, usize), right: (i32, usize)) -> bool {
if left.0 > right.0 {
return true;
} else if left.0 < right.0 {
return false;
}
match grammar.variables[left.1]
.implicit_precedence
.cmp(&grammar.variables[right.1].implicit_precedence)
{
match left.0.cmp(&right.0) {
Ordering::Less => false,
Ordering::Greater => true,
Ordering::Equal => left.1 < right.1,
Ordering::Equal => match grammar.variables[left.1]
.implicit_precedence
.cmp(&grammar.variables[right.1].implicit_precedence)
{
Ordering::Less => false,
Ordering::Greater => true,
Ordering::Equal => left.1 < right.1,
},
}
}
@ -135,10 +133,9 @@ impl<'a> TokenConflictMap<'a> {
return false;
}
if has_separator_transitions
&& grammar
&& !grammar
.variable_indices_for_nfa_states(&t.states)
.position(|i| i == completed_id)
.is_none()
.any(|i| i == completed_id)
{
return false;
}
@ -149,53 +146,53 @@ impl<'a> TokenConflictMap<'a> {
impl<'a> fmt::Debug for TokenConflictMap<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "TokenConflictMap {{\n")?;
writeln!(f, "TokenConflictMap {{")?;
let syntax_grammar = SyntaxGrammar::default();
write!(f, " following_tokens: {{\n")?;
writeln!(f, " following_tokens: {{")?;
for (i, following_tokens) in self.following_tokens.iter().enumerate() {
write!(
writeln!(
f,
" follow({:?}): {},\n",
" follow({:?}): {},",
self.grammar.variables[i].name,
TokenSetDisplay(following_tokens, &syntax_grammar, &self.grammar)
TokenSetDisplay(following_tokens, &syntax_grammar, self.grammar)
)?;
}
write!(f, " }},\n")?;
writeln!(f, " }},")?;
write!(f, " starting_characters: {{\n")?;
writeln!(f, " starting_characters: {{")?;
for i in 0..self.n {
write!(
writeln!(
f,
" {:?}: {:?},\n",
" {:?}: {:?},",
self.grammar.variables[i].name, self.starting_chars_by_index[i]
)?;
}
write!(f, " }},\n")?;
writeln!(f, " }},")?;
write!(f, " following_characters: {{\n")?;
writeln!(f, " following_characters: {{")?;
for i in 0..self.n {
write!(
writeln!(
f,
" {:?}: {:?},\n",
" {:?}: {:?},",
self.grammar.variables[i].name, self.following_chars_by_index[i]
)?;
}
write!(f, " }},\n")?;
writeln!(f, " }},")?;
write!(f, " status_matrix: {{\n")?;
writeln!(f, " status_matrix: {{")?;
for i in 0..self.n {
write!(f, " {:?}: {{\n", self.grammar.variables[i].name)?;
writeln!(f, " {:?}: {{", self.grammar.variables[i].name)?;
for j in 0..self.n {
write!(
writeln!(
f,
" {:?}: {:?},\n",
" {:?}: {:?},",
self.grammar.variables[j].name,
self.status_matrix[matrix_index(self.n, i, j)]
)?;
}
write!(f, " }},\n")?;
writeln!(f, " }},")?;
}
write!(f, " }},")?;
write!(f, "}}")?;
@ -203,7 +200,7 @@ impl<'a> fmt::Debug for TokenConflictMap<'a> {
}
}
fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
const fn matrix_index(variable_count: usize, i: usize, j: usize) -> usize {
variable_count * i + j
}
@ -221,8 +218,8 @@ fn get_starting_chars(cursor: &mut NfaCursor, grammar: &LexicalGrammar) -> Vec<C
}
fn get_following_chars(
starting_chars: &Vec<CharacterSet>,
following_tokens: &Vec<TokenSet>,
starting_chars: &[CharacterSet],
following_tokens: &[TokenSet],
) -> Vec<CharacterSet> {
following_tokens
.iter()
@ -241,7 +238,7 @@ fn get_following_chars(
fn compute_conflict_status(
cursor: &mut NfaCursor,
grammar: &LexicalGrammar,
following_chars: &Vec<CharacterSet>,
following_chars: &[CharacterSet],
i: usize,
j: usize,
) -> (TokenConflictStatus, TokenConflictStatus) {
@ -330,9 +327,8 @@ fn compute_conflict_status(
if variable_id == completed_id {
successor_contains_completed_id = true;
break;
} else {
advanced_id = Some(variable_id);
}
advanced_id = Some(variable_id);
}
// Determine which action is preferred: matching the already complete
@ -357,12 +353,10 @@ fn compute_conflict_status(
result.1.does_match_valid_continuation = true;
}
}
} else if completed_id == i {
result.0.matches_prefix = true;
} else {
if completed_id == i {
result.0.matches_prefix = true;
} else {
result.1.matches_prefix = true;
}
result.1.matches_prefix = true;
}
}
}

View file

@ -29,20 +29,20 @@ impl CharacterTree {
1 => {
let range = &ranges[0];
if range.start == range.end {
Some(CharacterTree::Compare {
Some(Self::Compare {
operator: Comparator::Equal,
value: range.start,
consequence: Some(Box::new(CharacterTree::Yes)),
consequence: Some(Box::new(Self::Yes)),
alternative: None,
})
} else {
Some(CharacterTree::Compare {
Some(Self::Compare {
operator: Comparator::GreaterOrEqual,
value: range.start,
consequence: Some(Box::new(CharacterTree::Compare {
consequence: Some(Box::new(Self::Compare {
operator: Comparator::LessOrEqual,
value: range.end,
consequence: Some(Box::new(CharacterTree::Yes)),
consequence: Some(Box::new(Self::Yes)),
alternative: None,
})),
alternative: None,
@ -52,14 +52,14 @@ impl CharacterTree {
len => {
let mid = len / 2;
let mid_range = &ranges[mid];
Some(CharacterTree::Compare {
Some(Self::Compare {
operator: Comparator::Less,
value: mid_range.start,
consequence: Self::from_ranges(&ranges[0..mid]).map(Box::new),
alternative: Some(Box::new(CharacterTree::Compare {
alternative: Some(Box::new(Self::Compare {
operator: Comparator::LessOrEqual,
value: mid_range.end,
consequence: Some(Box::new(CharacterTree::Yes)),
consequence: Some(Box::new(Self::Yes)),
alternative: Self::from_ranges(&ranges[(mid + 1)..]).map(Box::new),
})),
})
@ -70,8 +70,8 @@ impl CharacterTree {
#[cfg(test)]
fn contains(&self, c: char) -> bool {
match self {
CharacterTree::Yes => true,
CharacterTree::Compare {
Self::Yes => true,
Self::Compare {
value,
operator,
alternative,

View file

@ -1,9 +1,9 @@
pub(crate) fn split_state_id_groups<S>(
states: &Vec<S>,
pub fn split_state_id_groups<S>(
states: &[S],
state_ids_by_group_id: &mut Vec<Vec<usize>>,
group_ids_by_state_id: &mut Vec<usize>,
group_ids_by_state_id: &mut [usize],
start_group_id: usize,
mut f: impl FnMut(&S, &S, &Vec<usize>) -> bool,
mut f: impl FnMut(&S, &S, &[usize]) -> bool,
) -> bool {
let mut result = false;
@ -33,7 +33,7 @@ pub(crate) fn split_state_id_groups<S>(
}
let right_state = &states[right_state_id];
if f(left_state, right_state, &group_ids_by_state_id) {
if f(left_state, right_state, group_ids_by_state_id) {
split_state_ids.push(right_state_id);
}
@ -44,9 +44,9 @@ pub(crate) fn split_state_id_groups<S>(
}
// If any states were removed from the group, add them all as a new group.
if split_state_ids.len() > 0 {
if !split_state_ids.is_empty() {
result = true;
state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(&i));
state_ids_by_group_id[group_id].retain(|i| !split_state_ids.contains(i));
let new_group_id = state_ids_by_group_id.len();
for id in &split_state_ids {

View file

@ -4,7 +4,7 @@ use std::collections::HashMap;
use std::fmt;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) enum VariableType {
pub enum VariableType {
Hidden,
Auxiliary,
Anonymous,
@ -14,20 +14,20 @@ pub(crate) enum VariableType {
// Input grammar
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct Variable {
pub struct Variable {
pub name: String,
pub kind: VariableType,
pub rule: Rule,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum PrecedenceEntry {
pub enum PrecedenceEntry {
Name(String),
Symbol(String),
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct InputGrammar {
pub struct InputGrammar {
pub name: String,
pub variables: Vec<Variable>,
pub extra_symbols: Vec<Rule>,
@ -42,7 +42,7 @@ pub(crate) struct InputGrammar {
// Extracted lexical grammar
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct LexicalVariable {
pub struct LexicalVariable {
pub name: String,
pub kind: VariableType,
pub implicit_precedence: i32,
@ -50,7 +50,7 @@ pub(crate) struct LexicalVariable {
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct LexicalGrammar {
pub struct LexicalGrammar {
pub nfa: Nfa,
pub variables: Vec<LexicalVariable>,
}
@ -58,7 +58,7 @@ pub(crate) struct LexicalGrammar {
// Extracted syntax grammar
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct ProductionStep {
pub struct ProductionStep {
pub symbol: Symbol,
pub precedence: Precedence,
pub associativity: Option<Associativity>,
@ -67,33 +67,33 @@ pub(crate) struct ProductionStep {
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct Production {
pub struct Production {
pub steps: Vec<ProductionStep>,
pub dynamic_precedence: i32,
}
#[derive(Default)]
pub(crate) struct InlinedProductionMap {
pub struct InlinedProductionMap {
pub productions: Vec<Production>,
pub production_map: HashMap<(*const Production, u32), Vec<usize>>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct SyntaxVariable {
pub struct SyntaxVariable {
pub name: String,
pub kind: VariableType,
pub productions: Vec<Production>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ExternalToken {
pub struct ExternalToken {
pub name: String,
pub kind: VariableType,
pub corresponding_internal_token: Option<Symbol>,
}
#[derive(Debug, Default)]
pub(crate) struct SyntaxGrammar {
pub struct SyntaxGrammar {
pub variables: Vec<SyntaxVariable>,
pub extra_symbols: Vec<Symbol>,
pub expected_conflicts: Vec<Vec<Symbol>>,
@ -106,7 +106,7 @@ pub(crate) struct SyntaxGrammar {
#[cfg(test)]
impl ProductionStep {
pub(crate) fn new(symbol: Symbol) -> Self {
pub fn new(symbol: Symbol) -> Self {
Self {
symbol,
precedence: Precedence::None,
@ -116,11 +116,7 @@ impl ProductionStep {
}
}
pub(crate) fn with_prec(
self,
precedence: Precedence,
associativity: Option<Associativity>,
) -> Self {
pub fn with_prec(self, precedence: Precedence, associativity: Option<Associativity>) -> Self {
Self {
symbol: self.symbol,
precedence,
@ -130,7 +126,7 @@ impl ProductionStep {
}
}
pub(crate) fn with_alias(self, value: &str, is_named: bool) -> Self {
pub fn with_alias(self, value: &str, is_named: bool) -> Self {
Self {
symbol: self.symbol,
precedence: self.precedence,
@ -142,7 +138,7 @@ impl ProductionStep {
field_name: self.field_name,
}
}
pub(crate) fn with_field_name(self, name: &str) -> Self {
pub fn with_field_name(self, name: &str) -> Self {
Self {
symbol: self.symbol,
precedence: self.precedence,
@ -155,7 +151,7 @@ impl ProductionStep {
impl Production {
pub fn first_symbol(&self) -> Option<Symbol> {
self.steps.first().map(|s| s.symbol.clone())
self.steps.first().map(|s| s.symbol)
}
}
@ -195,24 +191,24 @@ impl Variable {
}
impl VariableType {
pub fn is_visible(&self) -> bool {
*self == VariableType::Named || *self == VariableType::Anonymous
pub fn is_visible(self) -> bool {
self == Self::Named || self == Self::Anonymous
}
}
impl LexicalGrammar {
pub fn variable_indices_for_nfa_states<'a>(
&'a self,
state_ids: &'a Vec<u32>,
state_ids: &'a [u32],
) -> impl Iterator<Item = usize> + 'a {
let mut prev = None;
state_ids.iter().filter_map(move |state_id| {
let variable_id = self.variable_index_for_nfa_state(*state_id);
if prev != Some(variable_id) {
if prev == Some(variable_id) {
None
} else {
prev = Some(variable_id);
prev
} else {
None
}
})
}
@ -246,7 +242,7 @@ impl InlinedProductionMap {
.map(|production_indices| {
production_indices
.iter()
.cloned()
.copied()
.map(move |index| &self.productions[index])
})
}
@ -255,8 +251,8 @@ impl InlinedProductionMap {
impl fmt::Display for PrecedenceEntry {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
PrecedenceEntry::Name(n) => write!(f, "'{}'", n),
PrecedenceEntry::Symbol(s) => write!(f, "$.{}", s),
Self::Name(n) => write!(f, "'{n}'"),
Self::Symbol(s) => write!(f, "$.{s}"),
}
}
}

View file

@ -22,7 +22,7 @@ use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::path::Path;
use std::process::{Command, Stdio};
use std::{env, fs};
@ -39,7 +39,7 @@ struct GeneratedParser {
}
pub fn generate_parser_in_directory(
repo_path: &PathBuf,
repo_path: &Path,
grammar_path: Option<&str>,
abi_version: usize,
generate_bindings: bool,
@ -50,12 +50,12 @@ pub fn generate_parser_in_directory(
let header_path = src_path.join("tree_sitter");
// Read the grammar.json.
let grammar_json = match grammar_path {
Some(path) => load_grammar_file(path.as_ref(), js_runtime)?,
None => {
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
load_grammar_file(&grammar_js_path, js_runtime)?
}
let grammar_json = if let Some(path) = grammar_path {
load_grammar_file(path.as_ref(), js_runtime)?
} else {
let grammar_js_path =
grammar_path.map_or(repo_path.join("grammar.js"), std::convert::Into::into);
load_grammar_file(&grammar_js_path, js_runtime)?
};
// Ensure that the output directories exist.
@ -63,8 +63,8 @@ pub fn generate_parser_in_directory(
fs::create_dir_all(&header_path)?;
if grammar_path.is_none() {
fs::write(&src_path.join("grammar.json"), &grammar_json)
.with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?;
fs::write(src_path.join("grammar.json"), &grammar_json)
.with_context(|| format!("Failed to write grammar.json to {src_path:?}"))?;
}
// Parse and preprocess the grammar.
@ -81,7 +81,7 @@ pub fn generate_parser_in_directory(
&language_name,
syntax_grammar,
lexical_grammar,
inlines,
&inlines,
simple_aliases,
abi_version,
report_symbol_name,
@ -92,7 +92,7 @@ pub fn generate_parser_in_directory(
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
if generate_bindings {
binding_files::generate_binding_files(&repo_path, &language_name)?;
binding_files::generate_binding_files(repo_path, &language_name)?;
}
Ok(())
@ -107,7 +107,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String
&input_grammar.name,
syntax_grammar,
lexical_grammar,
inlines,
&inlines,
simple_aliases,
tree_sitter::LANGUAGE_VERSION,
None,
@ -116,10 +116,10 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String
}
fn generate_parser_for_grammar_with_opts(
name: &String,
name: &str,
syntax_grammar: SyntaxGrammar,
lexical_grammar: LexicalGrammar,
inlines: InlinedProductionMap,
inlines: &InlinedProductionMap,
simple_aliases: AliasMap,
abi_version: usize,
report_symbol_name: Option<&str>,
@ -137,7 +137,7 @@ fn generate_parser_for_grammar_with_opts(
&lexical_grammar,
&simple_aliases,
&variable_info,
&inlines,
inlines,
report_symbol_name,
)?;
let c_code = render_c_code(
@ -169,10 +169,7 @@ pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Resul
Some("json") => {
Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
}
_ => Err(anyhow!(
"Unknown grammar file extension: {:?}",
grammar_path
)),
_ => Err(anyhow!("Unknown grammar file extension: {grammar_path:?}",)),
}
}
@ -213,7 +210,7 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result
match output.status.code() {
None => panic!("Node process was killed"),
Some(0) => {}
Some(code) => return Err(anyhow!("Node process exited with status {}", code)),
Some(code) => return Err(anyhow!("Node process exited with status {code}")),
}
let mut result =
String::from_utf8(output.stdout).with_context(|| "Got invalid UTF8 from node")?;

View file

@ -28,7 +28,7 @@ pub enum NfaState {
},
}
#[derive(PartialEq, Eq)]
#[derive(PartialEq, Eq, Default)]
pub struct Nfa {
pub states: Vec<NfaState>,
}
@ -47,40 +47,36 @@ pub struct NfaTransition {
pub states: Vec<u32>,
}
impl Default for Nfa {
fn default() -> Self {
Self { states: Vec::new() }
}
}
const END: u32 = char::MAX as u32 + 1;
impl CharacterSet {
/// Create a character set with a single character.
pub fn empty() -> Self {
CharacterSet { ranges: Vec::new() }
pub const fn empty() -> Self {
Self { ranges: Vec::new() }
}
/// Create a character set with a given *inclusive* range of characters.
#[allow(clippy::single_range_in_vec_init)]
pub fn from_range(mut first: char, mut last: char) -> Self {
if first > last {
swap(&mut first, &mut last);
}
CharacterSet {
Self {
ranges: vec![(first as u32)..(last as u32 + 1)],
}
}
/// Create a character set with a single character.
#[allow(clippy::single_range_in_vec_init)]
pub fn from_char(c: char) -> Self {
CharacterSet {
Self {
ranges: vec![(c as u32)..(c as u32 + 1)],
}
}
/// Create a character set containing all characters *not* present
/// in this character set.
pub fn negate(mut self) -> CharacterSet {
pub fn negate(mut self) -> Self {
let mut i = 0;
let mut previous_end = 0;
while i < self.ranges.len() {
@ -110,10 +106,10 @@ impl CharacterSet {
self
}
pub fn add(mut self, other: &CharacterSet) -> Self {
pub fn add(mut self, other: &Self) -> Self {
let mut index = 0;
for range in &other.ranges {
index = self.add_int_range(index, range.start as u32, range.end as u32);
index = self.add_int_range(index, range.start, range.end);
}
self
}
@ -143,7 +139,7 @@ impl CharacterSet {
i
}
pub fn does_intersect(&self, other: &CharacterSet) -> bool {
pub fn does_intersect(&self, other: &Self) -> bool {
let mut left_ranges = self.ranges.iter();
let mut right_ranges = other.ranges.iter();
let mut left_range = left_ranges.next();
@ -163,7 +159,7 @@ impl CharacterSet {
/// Get the set of characters that are present in both this set
/// and the other set. Remove those common characters from both
/// of the operands.
pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet {
pub fn remove_intersection(&mut self, other: &mut Self) -> Self {
let mut intersection = Vec::new();
let mut left_i = 0;
let mut right_i = 0;
@ -209,29 +205,28 @@ impl CharacterSet {
}
}
}
Ordering::Equal => {
// [ L ]
// [ R ]
if left.end < right.end {
intersection.push(left.start..left.end);
right.start = left.end;
self.ranges.remove(left_i);
}
// [ L ]
// [ R ]
else if left.end == right.end {
intersection.push(left.clone());
self.ranges.remove(left_i);
other.ranges.remove(right_i);
}
// [ L ]
// [ R ]
else if left.end > right.end {
intersection.push(right.clone());
left.start = right.end;
other.ranges.remove(right_i);
}
// [ L ]
// [ R ]
Ordering::Equal if left.end < right.end => {
intersection.push(left.start..left.end);
right.start = left.end;
self.ranges.remove(left_i);
}
// [ L ]
// [ R ]
Ordering::Equal if left.end == right.end => {
intersection.push(left.clone());
self.ranges.remove(left_i);
other.ranges.remove(right_i);
}
// [ L ]
// [ R ]
Ordering::Equal if left.end > right.end => {
intersection.push(right.clone());
left.start = right.end;
other.ranges.remove(right_i);
}
Ordering::Equal => {}
Ordering::Greater => {
// [ L ]
// [ R ]
@ -271,30 +266,30 @@ impl CharacterSet {
}
}
}
CharacterSet {
Self {
ranges: intersection,
}
}
/// Produces a `CharacterSet` containing every character in `self` that is not present in
/// `other`.
pub fn difference(mut self, mut other: CharacterSet) -> CharacterSet {
pub fn difference(mut self, mut other: Self) -> Self {
self.remove_intersection(&mut other);
self
}
/// Produces a `CharacterSet` containing every character that is in _exactly one_ of `self` or
/// `other`, but is not present in both sets.
pub fn symmetric_difference(mut self, mut other: CharacterSet) -> CharacterSet {
pub fn symmetric_difference(mut self, mut other: Self) -> Self {
self.remove_intersection(&mut other);
self.add(&other)
}
pub fn iter<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
self.ranges.iter().flat_map(|r| r.clone())
pub fn iter(&self) -> impl Iterator<Item = u32> + '_ {
self.ranges.iter().flat_map(std::clone::Clone::clone)
}
pub fn chars<'a>(&'a self) -> impl Iterator<Item = char> + 'a {
pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
self.iter().filter_map(char::from_u32)
}
@ -329,11 +324,10 @@ impl CharacterSet {
prev_range_successor += 1;
}
prev_range = Some(range.start..c);
None
} else {
prev_range = Some(c..c);
None
}
None
})
.collect()
}
@ -344,13 +338,19 @@ impl CharacterSet {
}
impl Ord for CharacterSet {
fn cmp(&self, other: &CharacterSet) -> Ordering {
fn cmp(&self, other: &Self) -> Ordering {
let count_cmp = self
.ranges
.iter()
.map(|r| r.len())
.map(std::iter::ExactSizeIterator::len)
.sum::<usize>()
.cmp(&other.ranges.iter().map(|r| r.len()).sum());
.cmp(
&other
.ranges
.iter()
.map(std::iter::ExactSizeIterator::len)
.sum(),
);
if count_cmp != Ordering::Equal {
return count_cmp;
}
@ -368,12 +368,12 @@ impl Ord for CharacterSet {
}
}
}
return Ordering::Equal;
Ordering::Equal
}
}
impl PartialOrd for CharacterSet {
fn partial_cmp(&self, other: &CharacterSet) -> Option<Ordering> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
@ -390,7 +390,7 @@ impl fmt::Debug for CharacterSet {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{:?}", c)?;
write!(f, "{c:?}")?;
}
write!(f, "]")?;
Ok(())
@ -398,8 +398,8 @@ impl fmt::Debug for CharacterSet {
}
impl Nfa {
pub fn new() -> Self {
Nfa { states: Vec::new() }
pub const fn new() -> Self {
Self { states: Vec::new() }
}
pub fn last_state_id(&self) -> u32 {
@ -409,9 +409,9 @@ impl Nfa {
impl fmt::Debug for Nfa {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Nfa {{ states: {{\n")?;
writeln!(f, "Nfa {{ states: {{")?;
for (i, state) in self.states.iter().enumerate() {
write!(f, " {}: {:?},\n", i, state)?;
writeln!(f, " {i}: {state:?},")?;
}
write!(f, "}} }}")?;
Ok(())
@ -434,7 +434,7 @@ impl<'a> NfaCursor<'a> {
}
pub fn force_reset(&mut self, states: Vec<u32>) {
self.state_ids = states
self.state_ids = states;
}
pub fn transition_chars(&self) -> impl Iterator<Item = (&CharacterSet, bool)> {
@ -472,9 +472,8 @@ impl<'a> NfaCursor<'a> {
let intersection = result[i].characters.remove_intersection(&mut chars);
if !intersection.is_empty() {
let mut intersection_states = result[i].states.clone();
match intersection_states.binary_search(&state) {
Err(j) => intersection_states.insert(j, state),
_ => {}
if let Err(j) = intersection_states.binary_search(&state) {
intersection_states.insert(j, state);
}
let intersection_transition = NfaTransition {
characters: intersection,
@ -824,8 +823,7 @@ mod tests {
.map(|(chars, is_sep, prec, state)| (chars, *is_sep, *prec, *state))
),
row.1,
"row {}",
i
"row {i}",
);
}
}
@ -966,12 +964,11 @@ mod tests {
row.right
);
let symm_difference = row.left_only.clone().add(&mut row.right_only.clone());
let symm_difference = row.left_only.clone().add(&row.right_only);
assert_eq!(
row.left.clone().symmetric_difference(row.right.clone()),
symm_difference,
"row {}b: {:?} ~~ {:?}",
i,
"row {i}b: {:?} ~~ {:?}",
row.left,
row.right
)
@ -1066,10 +1063,7 @@ mod tests {
expected_ranges,
} in table.iter()
{
let ruled_out_chars = ruled_out_chars
.into_iter()
.map(|c: &char| *c as u32)
.collect();
let ruled_out_chars = ruled_out_chars.iter().map(|c: &char| *c as u32).collect();
let mut set = CharacterSet::empty();
for c in chars {
set = set.add_char(*c);

View file

@ -6,19 +6,19 @@ use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ChildType {
pub enum ChildType {
Normal(Symbol),
Aliased(Alias),
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct FieldInfo {
pub struct FieldInfo {
pub quantity: ChildQuantity,
pub types: Vec<ChildType>,
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct VariableInfo {
pub struct VariableInfo {
pub fields: HashMap<String, FieldInfo>,
pub children: FieldInfo,
pub children_without_fields: FieldInfo,
@ -26,7 +26,7 @@ pub(crate) struct VariableInfo {
}
#[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)]
pub(crate) struct NodeInfoJSON {
pub struct NodeInfoJSON {
#[serde(rename = "type")]
kind: String,
named: bool,
@ -39,14 +39,14 @@ pub(crate) struct NodeInfoJSON {
}
#[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) struct NodeTypeJSON {
pub struct NodeTypeJSON {
#[serde(rename = "type")]
kind: String,
named: bool,
}
#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct FieldInfoJSON {
pub struct FieldInfoJSON {
multiple: bool,
required: bool,
types: Vec<NodeTypeJSON>,
@ -61,7 +61,7 @@ pub struct ChildQuantity {
impl Default for FieldInfoJSON {
fn default() -> Self {
FieldInfoJSON {
Self {
multiple: false,
required: true,
types: Vec::new(),
@ -76,23 +76,25 @@ impl Default for ChildQuantity {
}
impl ChildQuantity {
fn zero() -> Self {
ChildQuantity {
#[must_use]
const fn zero() -> Self {
Self {
exists: false,
required: false,
multiple: false,
}
}
fn one() -> Self {
ChildQuantity {
#[must_use]
const fn one() -> Self {
Self {
exists: true,
required: true,
multiple: false,
}
}
fn append(&mut self, other: ChildQuantity) {
fn append(&mut self, other: Self) {
if other.exists {
if self.exists || other.multiple {
self.multiple = true;
@ -104,7 +106,7 @@ impl ChildQuantity {
}
}
fn union(&mut self, other: ChildQuantity) -> bool {
fn union(&mut self, other: Self) -> bool {
let mut result = false;
if !self.exists && other.exists {
result = true;
@ -144,7 +146,7 @@ impl ChildQuantity {
/// 2. aliases. If a parent node type `M` is aliased as some other type `N`,
/// then nodes which *appear* to have type `N` may have internal structure based
/// on `M`.
pub(crate) fn get_variable_info(
pub fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
default_aliases: &AliasMap,
@ -209,12 +211,12 @@ pub(crate) fn get_variable_info(
let field_info = variable_info
.fields
.entry(field_name.clone())
.or_insert(FieldInfo::default());
.or_insert_with(FieldInfo::default);
did_change |= extend_sorted(&mut field_info.types, Some(&child_type));
let production_field_quantity = production_field_quantities
.entry(field_name)
.or_insert(ChildQuantity::zero());
.or_insert_with(ChildQuantity::zero);
// Inherit the types and quantities of hidden children associated with fields.
if child_is_hidden && child_symbol.is_non_terminal() {
@ -252,13 +254,13 @@ pub(crate) fn get_variable_info(
for (field_name, child_field_info) in &child_variable_info.fields {
production_field_quantities
.entry(field_name)
.or_insert(ChildQuantity::zero())
.or_insert_with(ChildQuantity::zero)
.append(child_field_info.quantity);
did_change |= extend_sorted(
&mut variable_info
.fields
.entry(field_name.clone())
.or_insert(FieldInfo::default())
.or_insert_with(FieldInfo::default)
.types,
&child_field_info.types,
);
@ -308,12 +310,12 @@ pub(crate) fn get_variable_info(
.quantity
.union(production_children_without_fields_quantity);
for (field_name, info) in variable_info.fields.iter_mut() {
for (field_name, info) in &mut variable_info.fields {
did_change |= info.quantity.union(
production_field_quantities
.get(field_name)
.cloned()
.unwrap_or(ChildQuantity::zero()),
.copied()
.unwrap_or_else(ChildQuantity::zero),
);
}
}
@ -345,8 +347,8 @@ pub(crate) fn get_variable_info(
.types
.retain(child_type_is_visible);
}
for variable_info in result.iter_mut() {
for (_, field_info) in variable_info.fields.iter_mut() {
for variable_info in &mut result {
for field_info in variable_info.fields.values_mut() {
field_info.types.retain(child_type_is_visible);
}
variable_info.fields.retain(|_, v| !v.types.is_empty());
@ -359,11 +361,11 @@ pub(crate) fn get_variable_info(
Ok(result)
}
pub(crate) fn generate_node_types_json(
pub fn generate_node_types_json(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
default_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>,
variable_info: &[VariableInfo],
) -> Vec<NodeInfoJSON> {
let mut node_types_json = BTreeMap::new();
@ -373,7 +375,7 @@ pub(crate) fn generate_node_types_json(
named: alias.is_named,
},
ChildType::Normal(symbol) => {
if let Some(alias) = default_aliases.get(&symbol) {
if let Some(alias) = default_aliases.get(symbol) {
NodeTypeJSON {
kind: alias.value.clone(),
named: alias.is_named,
@ -408,15 +410,15 @@ pub(crate) fn generate_node_types_json(
};
let populate_field_info_json = |json: &mut FieldInfoJSON, info: &FieldInfo| {
if info.types.len() > 0 {
if info.types.is_empty() {
json.required = false;
} else {
json.multiple |= info.quantity.multiple;
json.required &= info.quantity.required;
json.types
.extend(info.types.iter().map(child_type_to_node_type));
json.types.sort_unstable();
json.types.dedup();
} else {
json.required = false;
}
};
@ -432,7 +434,7 @@ pub(crate) fn generate_node_types_json(
if !default_aliases.contains_key(extra_symbol) {
aliases_by_symbol
.entry(*extra_symbol)
.or_insert(HashSet::new())
.or_insert_with(HashSet::new)
.insert(None);
}
}
@ -441,7 +443,7 @@ pub(crate) fn generate_node_types_json(
for step in &production.steps {
aliases_by_symbol
.entry(step.symbol)
.or_insert(HashSet::new())
.or_insert_with(HashSet::new)
.insert(
step.alias
.as_ref()
@ -451,7 +453,10 @@ pub(crate) fn generate_node_types_json(
}
}
}
aliases_by_symbol.insert(Symbol::non_terminal(0), [None].iter().cloned().collect());
aliases_by_symbol.insert(
Symbol::non_terminal(0),
std::iter::once(&None).cloned().collect(),
);
let mut subtype_map = Vec::new();
for (i, info) in variable_info.iter().enumerate() {
@ -516,7 +521,7 @@ pub(crate) fn generate_node_types_json(
});
let fields_json = node_type_json.fields.as_mut().unwrap();
for (new_field, field_info) in info.fields.iter() {
for (new_field, field_info) in &info.fields {
let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| {
// If another rule is aliased with the same name, and does *not* have this field,
// then this field cannot be required.
@ -558,7 +563,7 @@ pub(crate) fn generate_node_types_json(
}
});
for (_, node_type_json) in node_types_json.iter_mut() {
for node_type_json in node_types_json.values_mut() {
if node_type_json
.children
.as_ref()
@ -571,7 +576,7 @@ pub(crate) fn generate_node_types_json(
process_supertypes(children, &subtype_map);
}
if let Some(fields) = &mut node_type_json.fields {
for (_, field_info) in fields.iter_mut() {
for field_info in fields.values_mut() {
process_supertypes(field_info, &subtype_map);
}
}
@ -590,11 +595,11 @@ pub(crate) fn generate_node_types_json(
.unwrap_or(&empty)
.iter()
.map(move |alias| {
if let Some(alias) = alias {
(&alias.value, alias.kind())
} else {
(&variable.name, variable.kind)
}
alias
.as_ref()
.map_or((&variable.name, variable.kind), |alias| {
(&alias.value, alias.kind())
})
})
});
let external_tokens =
@ -608,11 +613,9 @@ pub(crate) fn generate_node_types_json(
.unwrap_or(&empty)
.iter()
.map(move |alias| {
if let Some(alias) = alias {
alias.as_ref().map_or((&token.name, token.kind), |alias| {
(&alias.value, alias.kind())
} else {
(&token.name, token.kind)
}
})
})
});
@ -630,7 +633,7 @@ pub(crate) fn generate_node_types_json(
children.required = false;
}
if let Some(fields) = &mut node_type_json.fields {
for (_, field) in fields.iter_mut() {
for field in fields.values_mut() {
field.required = false;
}
}
@ -647,7 +650,7 @@ pub(crate) fn generate_node_types_json(
}
let mut result = node_types_json.into_iter().map(|e| e.1).collect::<Vec<_>>();
result.extend(anonymous_node_types.into_iter());
result.extend(anonymous_node_types);
result.sort_unstable_by(|a, b| {
b.subtypes
.is_some()
@ -682,9 +685,9 @@ fn variable_type_for_child_type(
match child_type {
ChildType::Aliased(alias) => alias.kind(),
ChildType::Normal(symbol) => {
if syntax_grammar.supertype_symbols.contains(&symbol) {
if syntax_grammar.supertype_symbols.contains(symbol) {
VariableType::Named
} else if syntax_grammar.variables_to_inline.contains(&symbol) {
} else if syntax_grammar.variables_to_inline.contains(symbol) {
VariableType::Hidden
} else {
match symbol.kind {
@ -700,11 +703,10 @@ fn variable_type_for_child_type(
fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T>) -> bool
where
T: Clone + Eq + Ord,
T: 'a,
T: 'a + Clone + Eq + Ord,
{
values.into_iter().any(|value| {
if let Err(i) = vec.binary_search(&value) {
if let Err(i) = vec.binary_search(value) {
vec.insert(i, value.clone());
true
} else {
@ -1783,17 +1785,18 @@ mod tests {
variables: Vec<SyntaxVariable>,
supertype_symbols: Vec<Symbol>,
) -> SyntaxGrammar {
let mut syntax_grammar = SyntaxGrammar::default();
syntax_grammar.variables = variables;
syntax_grammar.supertype_symbols = supertype_symbols;
syntax_grammar
SyntaxGrammar {
variables,
supertype_symbols,
..SyntaxGrammar::default()
}
}
fn build_lexical_grammar() -> LexicalGrammar {
let mut lexical_grammar = LexicalGrammar::default();
for i in 0..10 {
lexical_grammar.variables.push(LexicalVariable {
name: format!("token_{}", i),
name: format!("token_{i}"),
kind: VariableType::Named,
implicit_precedence: 0,
start_state: 0,

View file

@ -7,6 +7,7 @@ use serde_json::{Map, Value};
#[derive(Deserialize)]
#[serde(tag = "type")]
#[allow(non_camel_case_types)]
#[allow(clippy::upper_case_acronyms)]
enum RuleJSON {
ALIAS {
content: Box<RuleJSON>,
@ -91,15 +92,15 @@ pub(crate) struct GrammarJSON {
}
pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
let grammar_json: GrammarJSON = serde_json::from_str(&input)?;
let grammar_json: GrammarJSON = serde_json::from_str(input)?;
let mut variables = Vec::with_capacity(grammar_json.rules.len());
for (name, value) in grammar_json.rules {
variables.push(Variable {
name: name.to_owned(),
name: name.clone(),
kind: VariableType::Named,
rule: parse_rule(serde_json::from_value(value)?),
})
});
}
let mut precedence_orderings = Vec::with_capacity(grammar_json.precedences.len());
@ -114,7 +115,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
"Invalid rule in precedences array. Only strings and symbols are allowed"
))
}
})
});
}
precedence_orderings.push(ordering);
}
@ -149,11 +150,11 @@ fn parse_rule(json: RuleJSON) -> Rule {
flags.map_or(String::new(), |f| {
f.chars()
.filter(|c| {
if *c != 'i' {
if *c == 'i' {
*c != 'u' // silently ignore unicode flag
} else {
eprintln!("Warning: unsupported flag {c}");
false
} else {
*c != 'u' // silently ignore unicode flag
}
})
.collect()
@ -182,11 +183,11 @@ fn parse_rule(json: RuleJSON) -> Rule {
}
}
impl Into<Precedence> for PrecedenceValueJSON {
fn into(self) -> Precedence {
match self {
PrecedenceValueJSON::Integer(i) => Precedence::Integer(i),
PrecedenceValueJSON::Name(i) => Precedence::Name(i),
impl From<PrecedenceValueJSON> for Precedence {
fn from(val: PrecedenceValueJSON) -> Self {
match val {
PrecedenceValueJSON::Integer(i) => Self::Integer(i),
PrecedenceValueJSON::Name(i) => Self::Name(i),
}
}
}

View file

@ -24,7 +24,7 @@ impl Expander {
// convert that rule itself into a binary tree structure instead of introducing
// another auxiliary rule.
if let (VariableType::Hidden, Rule::Repeat(repeated_content)) = (variable.kind, &rule) {
let inner_rule = self.expand_rule(&repeated_content);
let inner_rule = self.expand_rule(repeated_content);
variable.rule = self.wrap_rule_in_binary_tree(Symbol::non_terminal(index), inner_rule);
variable.kind = VariableType::Auxiliary;
return true;
@ -107,8 +107,8 @@ pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSy
existing_repeats: HashMap::new(),
};
for (i, mut variable) in grammar.variables.iter_mut().enumerate() {
let expanded_top_level_repetition = expander.expand_variable(i, &mut variable);
for (i, variable) in grammar.variables.iter_mut().enumerate() {
let expanded_top_level_repetition = expander.expand_variable(i, variable);
// If a hidden variable had a top-level repetition and it was converted to
// a recursive rule, then it can't be inlined.
@ -119,9 +119,7 @@ pub(super) fn expand_repeats(mut grammar: ExtractedSyntaxGrammar) -> ExtractedSy
}
}
grammar
.variables
.extend(expander.auxiliary_variables.into_iter());
grammar.variables.extend(expander.auxiliary_variables);
grammar
}

View file

@ -14,7 +14,7 @@ use std::i32;
lazy_static! {
static ref CURLY_BRACE_REGEX: Regex =
Regex::new(r#"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap();
Regex::new(r"(^|[^\\pP])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}").unwrap();
static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
serde_json::from_str(UNICODE_CATEGORIES_JSON).unwrap();
static ref UNICODE_PROPERTIES: HashMap<&'static str, Vec<u32>> =
@ -25,10 +25,10 @@ lazy_static! {
serde_json::from_str(UNICODE_PROPERTY_ALIASES_JSON).unwrap();
}
const UNICODE_CATEGORIES_JSON: &'static str = include_str!("./unicode-categories.json");
const UNICODE_PROPERTIES_JSON: &'static str = include_str!("./unicode-properties.json");
const UNICODE_CATEGORY_ALIASES_JSON: &'static str = include_str!("./unicode-category-aliases.json");
const UNICODE_PROPERTY_ALIASES_JSON: &'static str = include_str!("./unicode-property-aliases.json");
const UNICODE_CATEGORIES_JSON: &str = include_str!("./unicode-categories.json");
const UNICODE_PROPERTIES_JSON: &str = include_str!("./unicode-properties.json");
const UNICODE_CATEGORY_ALIASES_JSON: &str = include_str!("./unicode-category-aliases.json");
const UNICODE_PROPERTY_ALIASES_JSON: &str = include_str!("./unicode-property-aliases.json");
const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
struct NfaBuilder {
@ -51,7 +51,7 @@ fn get_implicit_precedence(rule: &Rule) -> i32 {
}
}
fn get_completion_precedence(rule: &Rule) -> i32 {
const fn get_completion_precedence(rule: &Rule) -> i32 {
if let Rule::Metadata { params, .. } = rule {
if let Precedence::Integer(p) = params.precedence {
return p;
@ -66,12 +66,10 @@ fn preprocess_regex(content: &str) -> String {
let mut is_escaped = false;
for c in content.chars() {
if is_escaped {
if ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) {
result.push(c);
} else {
if !ALLOWED_REDUNDANT_ESCAPED_CHARS.contains(&c) {
result.push('\\');
result.push(c);
}
result.push(c);
is_escaped = false;
} else if c == '\\' {
is_escaped = true;
@ -85,18 +83,18 @@ fn preprocess_regex(content: &str) -> String {
result
}
pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
pub fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<LexicalGrammar> {
let mut builder = NfaBuilder {
nfa: Nfa::new(),
is_sep: true,
precedence_stack: vec![0],
};
let separator_rule = if grammar.separators.len() > 0 {
let separator_rule = if grammar.separators.is_empty() {
Rule::Blank
} else {
grammar.separators.push(Rule::Blank);
Rule::repeat(Rule::choice(grammar.separators))
} else {
Rule::Blank
};
let mut variables = Vec::new();
@ -149,7 +147,7 @@ impl NfaBuilder {
self.push_advance(CharacterSet::empty().add_char(c), next_state_id);
next_state_id = self.nfa.last_state_id();
}
Ok(s.len() > 0)
Ok(!s.is_empty())
}
Rule::Choice(elements) => {
let mut alternative_state_ids = Vec::new();
@ -170,7 +168,7 @@ impl NfaBuilder {
}
Rule::Seq(elements) => {
let mut result = false;
for element in elements.into_iter().rev() {
for element in elements.iter().rev() {
if self.expand_rule(element, next_state_id)? {
result = true;
}
@ -206,7 +204,7 @@ impl NfaBuilder {
result
}
Rule::Blank => Ok(false),
_ => Err(anyhow!("Grammar error: Unexpected rule {:?}", rule)),
_ => Err(anyhow!("Grammar error: Unexpected rule {rule:?}")),
}
}
@ -216,7 +214,7 @@ impl NfaBuilder {
mut next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
fn inverse_char(c: char) -> char {
const fn inverse_char(c: char) -> char {
match c {
'a'..='z' => (c as u8 - b'a' + b'A') as char,
'A'..='Z' => (c as u8 - b'A' + b'a') as char,
@ -329,8 +327,8 @@ impl NfaBuilder {
Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive),
Ast::Alternation(alternation) => {
let mut alternative_state_ids = Vec::new();
for ast in alternation.asts.iter() {
if self.expand_regex(&ast, next_state_id, case_insensitive)? {
for ast in &alternation.asts {
if self.expand_regex(ast, next_state_id, case_insensitive)? {
alternative_state_ids.push(self.nfa.last_state_id());
} else {
alternative_state_ids.push(next_state_id);
@ -348,7 +346,7 @@ impl NfaBuilder {
Ast::Concat(concat) => {
let mut result = false;
for ast in concat.asts.iter().rev() {
if self.expand_regex(&ast, next_state_id, case_insensitive)? {
if self.expand_regex(ast, next_state_id, case_insensitive)? {
result = true;
next_state_id = self.nfa.last_state_id();
}
@ -360,7 +358,7 @@ impl NfaBuilder {
fn translate_class_set(&self, class_set: &ClassSet) -> Result<CharacterSet> {
match &class_set {
ClassSet::Item(item) => self.expand_character_class(&item),
ClassSet::Item(item) => self.expand_character_class(item),
ClassSet::BinaryOp(binary_op) => {
let mut lhs_char_class = self.translate_class_set(&binary_op.lhs)?;
let mut rhs_char_class = self.translate_class_set(&binary_op.rhs)?;
@ -390,7 +388,7 @@ impl NfaBuilder {
precedence: 0,
}); // Placeholder for split
let split_state_id = self.nfa.last_state_id();
if self.expand_regex(&ast, split_state_id, case_insensitive)? {
if self.expand_regex(ast, split_state_id, case_insensitive)? {
self.nfa.states[split_state_id as usize] =
NfaState::Split(self.nfa.last_state_id(), next_state_id);
Ok(true)
@ -420,7 +418,7 @@ impl NfaBuilder {
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
if self.expand_one_or_more(&ast, next_state_id, case_insensitive)? {
if self.expand_one_or_more(ast, next_state_id, case_insensitive)? {
self.push_split(next_state_id);
Ok(true)
} else {
@ -453,7 +451,7 @@ impl NfaBuilder {
ClassSetItem::Union(union) => {
let mut result = CharacterSet::empty();
for item in &union.items {
result = result.add(&self.expand_character_class(&item)?);
result = result.add(&self.expand_character_class(item)?);
}
Ok(result)
}
@ -472,9 +470,8 @@ impl NfaBuilder {
}
Ok(set)
}
_ => Err(anyhow!(
"Regex error: Unsupported character class syntax {:?}",
item
ClassSetItem::Ascii(_) => Err(anyhow!(
"Regex error: Unsupported character class syntax {item:?}",
)),
}
}
@ -495,15 +492,15 @@ impl NfaBuilder {
if actual_class_name.len() == 1 {
category_letter = actual_class_name.clone();
} else {
let code_points = UNICODE_CATEGORIES
.get(actual_class_name.as_str())
.or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str()))
.ok_or_else(|| {
anyhow!(
"Regex error: Unsupported unicode character class {}",
class_name
)
})?;
let code_points =
UNICODE_CATEGORIES
.get(actual_class_name.as_str())
.or_else(|| UNICODE_PROPERTIES.get(actual_class_name.as_str()))
.ok_or_else(|| {
anyhow!(
"Regex error: Unsupported unicode character class {class_name}",
)
})?;
for c in code_points {
if let Some(c) = std::char::from_u32(*c) {
chars = chars.add_char(c);
@ -956,7 +953,7 @@ mod tests {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: separators.clone(),
variables: rules
.into_iter()
.iter()
.map(|rule| Variable::named("", rule.clone()))
.collect(),
})

View file

@ -28,9 +28,9 @@ pub(super) fn extract_default_aliases(
// For each grammar symbol, find all of the aliases under which the symbol appears,
// and determine whether or not the symbol ever appears *unaliased*.
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
for variable in &syntax_grammar.variables {
for production in &variable.productions {
for step in &production.steps {
let status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
@ -62,7 +62,7 @@ pub(super) fn extract_default_aliases(
}
}
for symbol in syntax_grammar.extra_symbols.iter() {
for symbol in &syntax_grammar.extra_symbols {
let status = match symbol.kind {
SymbolType::External => &mut external_status_list[symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
@ -98,25 +98,23 @@ pub(super) fn extract_default_aliases(
for (symbol, status) in symbols_with_statuses {
if status.appears_unaliased {
status.aliases.clear();
} else {
if let Some(default_entry) = status
.aliases
.iter()
.enumerate()
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
.map(|(_, entry)| entry.clone())
{
status.aliases.clear();
status.aliases.push(default_entry.clone());
result.insert(symbol, default_entry.0);
}
} else if let Some(default_entry) = status
.aliases
.iter()
.enumerate()
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
.map(|(_, entry)| entry.clone())
{
status.aliases.clear();
status.aliases.push(default_entry.clone());
result.insert(symbol, default_entry.0);
}
}
// Wherever a symbol is aliased as its default alias, remove the usage of the alias,
// because it will now be redundant.
let mut alias_positions_to_clear = Vec::new();
for variable in syntax_grammar.variables.iter_mut() {
for variable in &mut syntax_grammar.variables {
alias_positions_to_clear.clear();
for (i, production) in variable.productions.iter().enumerate() {
@ -132,7 +130,7 @@ pub(super) fn extract_default_aliases(
// If this step is aliased as the symbol's default alias, then remove that alias.
if step.alias.is_some()
&& step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
&& step.alias.as_ref() == status.aliases.first().map(|t| &t.0)
{
let mut other_productions_must_use_this_alias_at_this_index = false;
for (other_i, other_production) in variable.productions.iter().enumerate() {

View file

@ -15,12 +15,12 @@ pub(super) fn extract_tokens(
extracted_usage_counts: Vec::new(),
};
for mut variable in grammar.variables.iter_mut() {
extractor.extract_tokens_in_variable(&mut variable);
for variable in &mut grammar.variables {
extractor.extract_tokens_in_variable(variable);
}
for mut variable in grammar.external_tokens.iter_mut() {
extractor.extract_tokens_in_variable(&mut variable);
for variable in &mut grammar.external_tokens {
extractor.extract_tokens_in_variable(variable);
}
let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len());
@ -59,7 +59,7 @@ pub(super) fn extract_tokens(
variables.push(variable);
}
for variable in variables.iter_mut() {
for variable in &mut variables {
variable.rule = symbol_replacer.replace_symbols_in_rule(&variable.rule);
}
@ -94,12 +94,10 @@ pub(super) fn extract_tokens(
for rule in grammar.extra_symbols {
if let Rule::Symbol(symbol) = rule {
extra_symbols.push(symbol_replacer.replace_symbol(symbol));
} else if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
extra_symbols.push(Symbol::terminal(index));
} else {
if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
extra_symbols.push(Symbol::terminal(index));
} else {
separators.push(rule);
}
separators.push(rule);
}
}
@ -119,13 +117,13 @@ pub(super) fn extract_tokens(
name: external_token.name,
kind: external_token.kind,
corresponding_internal_token: None,
})
});
} else {
external_tokens.push(ExternalToken {
name: lexical_variables[symbol.index].name.clone(),
kind: external_token.kind,
corresponding_internal_token: Some(symbol),
})
});
}
} else {
return Err(anyhow!(
@ -209,7 +207,7 @@ impl TokenExtractor {
} else {
Rule::Metadata {
params: params.clone(),
rule: Box::new(self.extract_tokens_in_rule(&rule)),
rule: Box::new(self.extract_tokens_in_rule(rule)),
}
}
}
@ -298,13 +296,13 @@ impl SymbolReplacer {
}
let mut adjusted_index = symbol.index;
for (replaced_index, _) in self.replacements.iter() {
for replaced_index in self.replacements.keys() {
if *replaced_index < symbol.index {
adjusted_index -= 1;
}
}
return Symbol::non_terminal(adjusted_index);
Symbol::non_terminal(adjusted_index)
}
}

View file

@ -88,7 +88,7 @@ impl RuleFlattener {
self.associativity_stack.pop();
if did_push && !at_end {
self.production.steps.last_mut().unwrap().associativity =
self.associativity_stack.last().cloned();
self.associativity_stack.last().copied();
}
}
@ -110,7 +110,7 @@ impl RuleFlattener {
.last()
.cloned()
.unwrap_or(Precedence::None),
associativity: self.associativity_stack.last().cloned(),
associativity: self.associativity_stack.last().copied(),
alias: self.alias_stack.last().cloned(),
field_name: self.field_name_stack.last().cloned(),
});
@ -129,7 +129,7 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
let extraction = extract_choices(element);
let mut next_result = Vec::new();
for entry in result {
for extraction_entry in extraction.iter() {
for extraction_entry in &extraction {
next_result.push(Rule::Seq(vec![entry.clone(), extraction_entry.clone()]));
}
}
@ -157,7 +157,7 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
}
}
fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
fn flatten_variable(variable: Variable) -> SyntaxVariable {
let mut productions = Vec::new();
for rule in extract_choices(variable.rule) {
let production = RuleFlattener::new().flatten(rule);
@ -165,11 +165,11 @@ fn flatten_variable(variable: Variable) -> Result<SyntaxVariable> {
productions.push(production);
}
}
Ok(SyntaxVariable {
SyntaxVariable {
name: variable.name,
kind: variable.kind,
productions,
})
}
}
fn symbol_is_used(variables: &Vec<SyntaxVariable>, symbol: Symbol) -> bool {
@ -188,7 +188,7 @@ fn symbol_is_used(variables: &Vec<SyntaxVariable>, symbol: Symbol) -> bool {
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
let mut variables = Vec::new();
for variable in grammar.variables {
variables.push(flatten_variable(variable)?);
variables.push(flatten_variable(variable));
}
for (i, variable) in variables.iter().enumerate() {
for production in &variable.productions {
@ -245,8 +245,7 @@ mod tests {
),
Rule::non_terminal(7),
]),
})
.unwrap();
});
assert_eq!(
result.productions,
@ -304,8 +303,7 @@ mod tests {
),
Rule::non_terminal(7),
]),
})
.unwrap();
});
assert_eq!(
result.productions,
@ -344,8 +342,7 @@ mod tests {
Precedence::Integer(101),
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
),
})
.unwrap();
});
assert_eq!(
result.productions,
@ -367,8 +364,7 @@ mod tests {
Precedence::Integer(101),
Rule::seq(vec![Rule::non_terminal(1)]),
),
})
.unwrap();
});
assert_eq!(
result.productions,
@ -393,8 +389,7 @@ mod tests {
Rule::field("second-thing".to_string(), Rule::terminal(3)),
]),
]),
})
.unwrap();
});
assert_eq!(
result.productions,

View file

@ -11,7 +11,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
}
let mut variables = Vec::with_capacity(grammar.variables.len());
for variable in grammar.variables.iter() {
for variable in &grammar.variables {
variables.push(Variable {
name: variable.name.clone(),
kind: variable_type_for_name(&variable.name),
@ -20,10 +20,10 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
}
let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
for external_token in grammar.external_tokens.iter() {
let rule = interner.intern_rule(&external_token)?;
for external_token in &grammar.external_tokens {
let rule = interner.intern_rule(external_token)?;
let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
(name.clone(), variable_type_for_name(&name))
(name.clone(), variable_type_for_name(name))
} else {
(String::new(), VariableType::Anonymous)
};
@ -31,35 +31,35 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
}
let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
for extra_token in grammar.extra_symbols.iter() {
for extra_token in &grammar.extra_symbols {
extra_symbols.push(interner.intern_rule(extra_token)?);
}
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
for supertype_symbol_name in grammar.supertype_symbols.iter() {
for supertype_symbol_name in &grammar.supertype_symbols {
supertype_symbols.push(
interner
.intern_name(supertype_symbol_name)
.ok_or_else(|| anyhow!("Undefined symbol `{}`", supertype_symbol_name))?,
.ok_or_else(|| anyhow!("Undefined symbol `{supertype_symbol_name}`"))?,
);
}
let mut expected_conflicts = Vec::new();
for conflict in grammar.expected_conflicts.iter() {
for conflict in &grammar.expected_conflicts {
let mut interned_conflict = Vec::with_capacity(conflict.len());
for name in conflict {
interned_conflict.push(
interner
.intern_name(&name)
.ok_or_else(|| anyhow!("Undefined symbol `{}`", name))?,
.intern_name(name)
.ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?,
);
}
expected_conflicts.push(interned_conflict);
}
let mut variables_to_inline = Vec::new();
for name in grammar.variables_to_inline.iter() {
if let Some(symbol) = interner.intern_name(&name) {
for name in &grammar.variables_to_inline {
if let Some(symbol) = interner.intern_name(name) {
variables_to_inline.push(symbol);
}
}
@ -68,8 +68,8 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
if let Some(name) = grammar.word_token.as_ref() {
word_token = Some(
interner
.intern_name(&name)
.ok_or_else(|| anyhow!("Undefined symbol `{}`", &name))?,
.intern_name(name)
.ok_or_else(|| anyhow!("Undefined symbol `{name}`"))?,
);
}
@ -118,13 +118,10 @@ impl<'a> Interner<'a> {
params: params.clone(),
}),
Rule::NamedSymbol(name) => {
if let Some(symbol) = self.intern_name(&name) {
Ok(Rule::Symbol(symbol))
} else {
Err(anyhow!("Undefined symbol `{}`", name))
}
}
Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
|| Err(anyhow!("Undefined symbol `{name}`")),
|symbol| Ok(Rule::Symbol(symbol)),
),
_ => Ok(rule.clone()),
}
@ -145,12 +142,12 @@ impl<'a> Interner<'a> {
}
}
return None;
None
}
}
fn variable_type_for_name(name: &str) -> VariableType {
if name.starts_with("_") {
if name.starts_with('_') {
VariableType::Hidden
} else {
VariableType::Named

View file

@ -6,7 +6,7 @@ mod flatten_grammar;
mod intern_symbols;
mod process_inlines;
pub(crate) use self::expand_tokens::expand_tokens;
pub use self::expand_tokens::expand_tokens;
use self::expand_repeats::expand_repeats;
use self::extract_default_aliases::extract_default_aliases;
@ -26,7 +26,7 @@ use std::{
mem,
};
pub(crate) struct IntermediateGrammar<T, U> {
pub struct IntermediateGrammar<T, U> {
variables: Vec<Variable>,
extra_symbols: Vec<T>,
expected_conflicts: Vec<Vec<Symbol>>,
@ -37,12 +37,12 @@ pub(crate) struct IntermediateGrammar<T, U> {
word_token: Option<Symbol>,
}
pub(crate) type InternedGrammar = IntermediateGrammar<Rule, Variable>;
pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
pub(crate) type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
pub type ExtractedSyntaxGrammar = IntermediateGrammar<Symbol, ExternalToken>;
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct ExtractedLexicalGrammar {
pub struct ExtractedLexicalGrammar {
pub variables: Vec<Variable>,
pub separators: Vec<Rule>,
}
@ -50,21 +50,21 @@ pub(crate) struct ExtractedLexicalGrammar {
impl<T, U> Default for IntermediateGrammar<T, U> {
fn default() -> Self {
Self {
variables: Default::default(),
extra_symbols: Default::default(),
expected_conflicts: Default::default(),
precedence_orderings: Default::default(),
external_tokens: Default::default(),
variables_to_inline: Default::default(),
supertype_symbols: Default::default(),
word_token: Default::default(),
variables: Vec::default(),
extra_symbols: Vec::default(),
expected_conflicts: Vec::default(),
precedence_orderings: Vec::default(),
external_tokens: Vec::default(),
variables_to_inline: Vec::default(),
supertype_symbols: Vec::default(),
word_token: Option::default(),
}
}
}
/// Transform an input grammar into separate components that are ready
/// for parse table construction.
pub(crate) fn prepare_grammar(
pub fn prepare_grammar(
input_grammar: &InputGrammar,
) -> Result<(
SyntaxGrammar,
@ -109,9 +109,7 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
hash_map::Entry::Occupied(e) => {
if e.get() != &ordering {
return Err(anyhow!(
"Conflicting orderings for precedences {} and {}",
entry1,
entry2
"Conflicting orderings for precedences {entry1} and {entry2}",
));
}
}
@ -127,16 +125,11 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
Rule::Repeat(rule) => validate(rule_name, rule, names),
Rule::Seq(elements) | Rule::Choice(elements) => elements
.iter()
.map(|e| validate(rule_name, e, names))
.collect(),
.try_for_each(|e| validate(rule_name, e, names)),
Rule::Metadata { rule, params } => {
if let Precedence::Name(n) = &params.precedence {
if !names.contains(n) {
return Err(anyhow!(
"Undeclared precedence '{}' in rule '{}'",
n,
rule_name
));
return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'"));
}
}
validate(rule_name, rule, names)?;

View file

@ -21,7 +21,7 @@ struct InlinedProductionMapBuilder {
}
impl InlinedProductionMapBuilder {
fn build<'a>(mut self, grammar: &'a SyntaxGrammar) -> InlinedProductionMap {
fn build(mut self, grammar: &SyntaxGrammar) -> InlinedProductionMap {
let mut step_ids_to_process = Vec::new();
for (variable_index, variable) in grammar.variables.iter().enumerate() {
for production_index in 0..variable.productions.len() {
@ -38,14 +38,14 @@ impl InlinedProductionMapBuilder {
if grammar.variables_to_inline.contains(&step.symbol) {
let inlined_step_ids = self
.inline_production_at_step(step_id, grammar)
.into_iter()
.cloned()
.iter()
.copied()
.map(|production_index| ProductionStepId {
variable_index: None,
production_index,
step_index: step_id.step_index,
});
step_ids_to_process.splice(i..i + 1, inlined_step_ids);
step_ids_to_process.splice(i..=i, inlined_step_ids);
} else {
step_ids_to_process[i] = ProductionStepId {
variable_index: step_id.variable_index,
@ -67,11 +67,12 @@ impl InlinedProductionMapBuilder {
let production_map = production_indices_by_step_id
.into_iter()
.map(|(step_id, production_indices)| {
let production = if let Some(variable_index) = step_id.variable_index {
&grammar.variables[variable_index].productions[step_id.production_index]
} else {
&productions[step_id.production_index]
} as *const Production;
let production = step_id.variable_index.map_or_else(
|| &productions[step_id.production_index],
|variable_index| {
&grammar.variables[variable_index].productions[step_id.production_index]
},
) as *const Production;
((production, step_id.step_index as u32), production_indices)
})
.collect();
@ -93,22 +94,22 @@ impl InlinedProductionMapBuilder {
let mut productions_to_add = vec![self.production_for_id(step_id, grammar).clone()];
while i < productions_to_add.len() {
if let Some(step) = productions_to_add[i].steps.get(step_index) {
let symbol = step.symbol.clone();
let symbol = step.symbol;
if grammar.variables_to_inline.contains(&symbol) {
// Remove the production from the vector, replacing it with a placeholder.
let production = productions_to_add
.splice(i..i + 1, [Production::default()].iter().cloned())
.splice(i..=i, std::iter::once(&Production::default()).cloned())
.next()
.unwrap();
// Replace the placeholder with the inlined productions.
productions_to_add.splice(
i..i + 1,
i..=i,
grammar.variables[symbol.index].productions.iter().map(|p| {
let mut production = production.clone();
let removed_step = production
.steps
.splice(step_index..(step_index + 1), p.steps.iter().cloned())
.splice(step_index..=step_index, p.steps.iter().cloned())
.next()
.unwrap();
let inserted_steps =
@ -127,7 +128,7 @@ impl InlinedProductionMapBuilder {
if last_inserted_step.precedence.is_none() {
last_inserted_step.precedence = removed_step.precedence;
}
if last_inserted_step.associativity == None {
if last_inserted_step.associativity.is_none() {
last_inserted_step.associativity = removed_step.associativity;
}
}
@ -169,11 +170,10 @@ impl InlinedProductionMapBuilder {
id: ProductionStepId,
grammar: &'a SyntaxGrammar,
) -> &'a Production {
if let Some(variable_index) = id.variable_index {
&grammar.variables[variable_index].productions[id.production_index]
} else {
&self.productions[id.production_index]
}
id.variable_index.map_or_else(
|| &self.productions[id.production_index],
|variable_index| &grammar.variables[variable_index].productions[id.production_index],
)
}
fn production_step_for_id<'a>(

View file

@ -154,7 +154,7 @@ impl Generator {
self.symbol_map = HashMap::new();
for symbol in self.parse_table.symbols.iter() {
for symbol in &self.parse_table.symbols {
let mut mapping = symbol;
// There can be multiple symbols in the grammar that have the same name and kind,
@ -201,7 +201,7 @@ impl Generator {
for production_info in &self.parse_table.production_infos {
// Build a list of all field names
for field_name in production_info.field_map.keys() {
if let Err(i) = self.field_names.binary_search(&field_name) {
if let Err(i) = self.field_names.binary_search(field_name) {
self.field_names.insert(i, field_name.clone());
}
}
@ -209,13 +209,14 @@ impl Generator {
for alias in &production_info.alias_sequence {
// Generate a mapping from aliases to C identifiers.
if let Some(alias) = &alias {
let existing_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
if let Some(default_alias) = self.default_aliases.get(symbol) {
default_alias == alias
} else {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias.kind()
}
let existing_symbol = self.parse_table.symbols.iter().copied().find(|symbol| {
self.default_aliases.get(symbol).map_or_else(
|| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias.kind()
},
|default_alias| default_alias == alias,
)
});
// Some aliases match an existing symbol in the grammar.
@ -316,7 +317,7 @@ impl Generator {
"#define SYMBOL_COUNT {}",
self.parse_table.symbols.len()
);
add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len(),);
add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
add_line!(self, "#define TOKEN_COUNT {}", token_count);
add_line!(
self,
@ -342,7 +343,7 @@ impl Generator {
indent!(self);
self.symbol_order.insert(Symbol::end(), 0);
let mut i = 1;
for symbol in self.parse_table.symbols.iter() {
for symbol in &self.parse_table.symbols {
if *symbol != Symbol::end() {
self.symbol_order.insert(*symbol, i);
add_line!(self, "{} = {},", self.symbol_ids[&symbol], i);
@ -361,12 +362,13 @@ impl Generator {
fn add_symbol_names_list(&mut self) {
add_line!(self, "static const char * const ts_symbol_names[] = {{");
indent!(self);
for symbol in self.parse_table.symbols.iter() {
for symbol in &self.parse_table.symbols {
let name = self.sanitize_string(
self.default_aliases
.get(symbol)
.map(|alias| alias.value.as_str())
.unwrap_or(self.metadata_for_symbol(*symbol).0),
.map_or(self.metadata_for_symbol(*symbol).0, |alias| {
alias.value.as_str()
}),
);
add_line!(self, "[{}] = \"{}\",", self.symbol_ids[&symbol], name);
}
@ -527,15 +529,13 @@ impl Generator {
if let Some(alias) = &step.alias {
if step.symbol.is_non_terminal()
&& Some(alias) != self.default_aliases.get(&step.symbol)
&& self.symbol_ids.contains_key(&step.symbol)
{
if self.symbol_ids.contains_key(&step.symbol) {
if let Some(alias_id) = self.alias_ids.get(&alias) {
let alias_ids = alias_ids_by_symbol
.entry(step.symbol)
.or_insert(Vec::new());
if let Err(i) = alias_ids.binary_search(&alias_id) {
alias_ids.insert(i, alias_id);
}
if let Some(alias_id) = self.alias_ids.get(alias) {
let alias_ids =
alias_ids_by_symbol.entry(step.symbol).or_insert(Vec::new());
if let Err(i) = alias_ids.binary_search(&alias_id) {
alias_ids.insert(i, alias_id);
}
}
}
@ -555,11 +555,11 @@ impl Generator {
for (symbol, alias_ids) in alias_ids_by_symbol {
let symbol_id = &self.symbol_ids[symbol];
let public_symbol_id = &self.symbol_ids[&self.symbol_map[&symbol]];
add_line!(self, "{}, {},", symbol_id, 1 + alias_ids.len());
add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len());
indent!(self);
add_line!(self, "{},", public_symbol_id);
add_line!(self, "{public_symbol_id},");
for alias_id in alias_ids {
add_line!(self, "{},", alias_id);
add_line!(self, "{alias_id},");
}
dedent!(self);
}
@ -585,7 +585,7 @@ impl Generator {
let primary_state = first_state_for_each_core_id
.entry(state.core_id)
.or_insert(idx);
add_line!(self, "[{}] = {},", idx, primary_state);
add_line!(self, "[{idx}] = {primary_state},");
}
dedent!(self);
add_line!(self, "}};");
@ -603,7 +603,9 @@ impl Generator {
let mut field_map_ids = Vec::new();
for production_info in &self.parse_table.production_infos {
if !production_info.field_map.is_empty() {
if production_info.field_map.is_empty() {
field_map_ids.push((0, 0));
} else {
let mut flat_field_map = Vec::new();
for (field_name, locations) in &production_info.field_map {
for location in locations {
@ -618,8 +620,6 @@ impl Generator {
),
flat_field_map.len(),
));
} else {
field_map_ids.push((0, 0));
}
}
@ -632,10 +632,7 @@ impl Generator {
if length > 0 {
add_line!(
self,
"[{}] = {{.index = {}, .length = {}}},",
production_id,
row_id,
length
"[{production_id}] = {{.index = {row_id}, .length = {length}}},",
);
}
}
@ -649,7 +646,7 @@ impl Generator {
);
indent!(self);
for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
add_line!(self, "[{}] =", row_index);
add_line!(self, "[{row_index}] =");
indent!(self);
for (field_name, location) in field_pairs {
add_whitespace!(self);
@ -697,7 +694,7 @@ impl Generator {
ruled_out_chars.extend(chars.iter());
} else {
ranges = chars.clone().negate().simplify_ignoring(&ruled_out_chars);
ranges.insert(0, '\0'..'\0')
ranges.insert(0, '\0'..'\0');
}
// Record any large character sets so that they can be extracted
@ -738,7 +735,7 @@ impl Generator {
.collect();
// Generate a helper function for each large character set.
let mut sorted_large_char_sets: Vec<_> = large_character_sets.iter().map(|e| e).collect();
let mut sorted_large_char_sets = large_character_sets.iter().collect::<Vec<_>>();
sorted_large_char_sets.sort_unstable_by_key(|info| (info.symbol, info.index));
for info in sorted_large_char_sets {
add_line!(
@ -760,8 +757,7 @@ impl Generator {
add_line!(
self,
"static bool {}(TSLexer *lexer, TSStateId state) {{",
name
"static bool {name}(TSLexer *lexer, TSStateId state) {{",
);
indent!(self);
@ -771,7 +767,7 @@ impl Generator {
indent!(self);
for (i, state) in lex_table.states.into_iter().enumerate() {
add_line!(self, "case {}:", i);
add_line!(self, "case {i}:");
indent!(self);
self.add_lex_state(state, &state_transition_summaries[i], &large_character_sets);
dedent!(self);
@ -810,14 +806,14 @@ impl Generator {
}
i += 1;
}
return None;
None
}
fn add_lex_state(
&mut self,
state: LexState,
transition_info: &Vec<TransitionSummary>,
large_character_sets: &Vec<LargeCharacterSetInfo>,
transition_info: &[TransitionSummary],
large_character_sets: &[LargeCharacterSetInfo],
) {
if let Some(accept_action) = state.accept_action {
add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
@ -852,7 +848,7 @@ impl Generator {
// Otherwise, generate code to compare the lookahead character
// with all of the character ranges.
if transition.ranges.len() > 0 {
if !transition.ranges.is_empty() {
add!(self, "if (");
self.add_character_range_conditions(&transition.ranges, transition.is_included, 2);
add!(self, ") ");
@ -878,26 +874,20 @@ impl Generator {
for (i, range) in ranges.iter().enumerate() {
if is_included {
if i > 0 {
add!(self, " ||{}", line_break);
add!(self, " ||{line_break}");
}
if range.start == '\0' {
add!(self, "!eof && ");
}
if range.end == range.start {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
} else if range.end as u32 == range.start as u32 + 1 {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
add!(self, " ||{}lookahead == ", line_break);
add!(self, " ||{line_break}lookahead == ");
self.add_character(range.end);
} else {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "(");
self.add_character(range.start);
add!(self, " <= lookahead && lookahead <= ");
@ -906,7 +896,7 @@ impl Generator {
}
} else {
if i > 0 {
add!(self, " &&{}", line_break);
add!(self, " &&{line_break}");
}
if range.end == range.start {
add!(self, "lookahead != ");
@ -914,19 +904,17 @@ impl Generator {
} else if range.end as u32 == range.start as u32 + 1 {
add!(self, "lookahead != ");
self.add_character(range.start);
add!(self, " &&{}lookahead != ", line_break);
add!(self, " &&{line_break}lookahead != ");
self.add_character(range.end);
} else if range.start != '\0' {
add!(self, "(lookahead < ");
self.add_character(range.start);
add!(self, " || ");
self.add_character(range.end);
add!(self, " < lookahead)");
} else {
if range.start != '\0' {
add!(self, "(lookahead < ");
self.add_character(range.start);
add!(self, " || ");
self.add_character(range.end);
add!(self, " < lookahead)");
} else {
add!(self, "lookahead > ");
self.add_character(range.end);
}
add!(self, "lookahead > ");
self.add_character(range.end);
}
}
}
@ -955,7 +943,7 @@ impl Generator {
add!(self, "(");
}
add!(self, "c {} ", op);
add!(self, "c {op} ");
self.add_character(*value);
if !simple {
@ -1008,17 +996,16 @@ impl Generator {
indent!(self);
for (i, state) in self.parse_table.states.iter().enumerate() {
if state.is_end_of_non_terminal_extra() {
add_line!(self, "[{}] = {{(TSStateId)(-1)}},", i,);
add_line!(self, "[{i}] = {{(TSStateId)(-1)}},");
} else if state.external_lex_state_id > 0 {
add_line!(
self,
"[{}] = {{.lex_state = {}, .external_lex_state = {}}},",
i,
"[{i}] = {{.lex_state = {}, .external_lex_state = {}}},",
state.lex_state_id,
state.external_lex_state_id
);
} else {
add_line!(self, "[{}] = {{.lex_state = {}}},", i, state.lex_state_id);
add_line!(self, "[{i}] = {{.lex_state = {}}},", state.lex_state_id);
}
}
dedent!(self);
@ -1052,11 +1039,11 @@ impl Generator {
let token = &self.syntax_grammar.external_tokens[i];
let id_token = token
.corresponding_internal_token
.unwrap_or(Symbol::external(i));
.unwrap_or_else(|| Symbol::external(i));
add_line!(
self,
"[{}] = {},",
self.external_token_id(&token),
self.external_token_id(token),
self.symbol_ids[&id_token],
);
}
@ -1151,12 +1138,7 @@ impl Generator {
&mut parse_table_entries,
&mut next_parse_action_list_index,
);
add_line!(
self,
"[{}] = ACTIONS({}),",
self.symbol_ids[symbol],
entry_id
);
add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
}
dedent!(self);
add_line!(self, "}},");
@ -1212,14 +1194,14 @@ impl Generator {
(symbols.len(), *kind, *value, symbols[0])
});
add_line!(self, "[{}] = {},", index, values_with_symbols.len());
add_line!(self, "[{index}] = {},", values_with_symbols.len());
indent!(self);
for ((value, kind), symbols) in values_with_symbols.iter_mut() {
for ((value, kind), symbols) in &mut values_with_symbols {
if *kind == SymbolType::NonTerminal {
add_line!(self, "STATE({}), {},", value, symbols.len());
add_line!(self, "STATE({value}), {},", symbols.len());
} else {
add_line!(self, "ACTIONS({}), {},", value, symbols.len());
add_line!(self, "ACTIONS({value}), {},", symbols.len());
}
symbols.sort_unstable();
@ -1250,8 +1232,7 @@ impl Generator {
for i in self.large_state_count..self.parse_table.states.len() {
add_line!(
self,
"[SMALL_STATE({})] = {},",
i,
"[SMALL_STATE({i})] = {},",
small_state_indices[i - self.large_state_count]
);
}
@ -1260,10 +1241,10 @@ impl Generator {
add_line!(self, "");
}
let mut parse_table_entries: Vec<_> = parse_table_entries
let mut parse_table_entries = parse_table_entries
.into_iter()
.map(|(entry, i)| (i, entry))
.collect();
.collect::<Vec<_>>();
parse_table_entries.sort_by_key(|(index, _)| *index);
self.add_parse_action_list(parse_table_entries);
}
@ -1277,8 +1258,7 @@ impl Generator {
for (i, entry) in parse_table_entries {
add!(
self,
" [{}] = {{.entry = {{.count = {}, .reusable = {}}}}},",
i,
" [{i}] = {{.entry = {{.count = {}, .reusable = {}}}}},",
entry.actions.len(),
entry.reusable
);
@ -1293,9 +1273,9 @@ impl Generator {
is_repetition,
} => {
if is_repetition {
add!(self, "SHIFT_REPEAT({})", state);
add!(self, "SHIFT_REPEAT({state})");
} else {
add!(self, "SHIFT({})", state);
add!(self, "SHIFT({state})");
}
}
ParseAction::Reduce {
@ -1305,17 +1285,17 @@ impl Generator {
production_id,
..
} => {
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
add!(self, "REDUCE({}, {child_count}", self.symbol_ids[&symbol]);
if dynamic_precedence != 0 {
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
add!(self, ", .dynamic_precedence = {dynamic_precedence}");
}
if production_id != 0 {
add!(self, ", .production_id = {}", production_id);
add!(self, ", .production_id = {production_id}");
}
add!(self, ")");
}
}
add!(self, ",")
add!(self, ",");
}
add!(self, "\n");
}
@ -1326,29 +1306,26 @@ impl Generator {
fn add_parser_export(&mut self) {
let language_function_name = format!("tree_sitter_{}", self.language_name);
let external_scanner_name = format!("{}_external_scanner", language_function_name);
let external_scanner_name = format!("{language_function_name}_external_scanner");
add_line!(self, "#ifdef __cplusplus");
add_line!(self, r#"extern "C" {{"#);
add_line!(self, "#endif");
if !self.syntax_grammar.external_tokens.is_empty() {
add_line!(self, "void *{}_create(void);", external_scanner_name);
add_line!(self, "void {}_destroy(void *);", external_scanner_name);
add_line!(self, "void *{external_scanner_name}_create(void);");
add_line!(self, "void {external_scanner_name}_destroy(void *);");
add_line!(
self,
"bool {}_scan(void *, TSLexer *, const bool *);",
external_scanner_name
"bool {external_scanner_name}_scan(void *, TSLexer *, const bool *);",
);
add_line!(
self,
"unsigned {}_serialize(void *, char *);",
external_scanner_name
"unsigned {external_scanner_name}_serialize(void *, char *);",
);
add_line!(
self,
"void {}_deserialize(void *, const char *, unsigned);",
external_scanner_name
"void {external_scanner_name}_deserialize(void *, const char *, unsigned);",
);
add_line!(self, "");
}
@ -1360,8 +1337,7 @@ impl Generator {
add_line!(
self,
"extern const TSLanguage *{}(void) {{",
language_function_name
"extern const TSLanguage *{language_function_name}(void) {{",
);
indent!(self);
add_line!(self, "static const TSLanguage language = {{");
@ -1421,11 +1397,11 @@ impl Generator {
indent!(self);
add_line!(self, "&ts_external_scanner_states[0][0],");
add_line!(self, "ts_external_scanner_symbol_map,");
add_line!(self, "{}_create,", external_scanner_name);
add_line!(self, "{}_destroy,", external_scanner_name);
add_line!(self, "{}_scan,", external_scanner_name);
add_line!(self, "{}_serialize,", external_scanner_name);
add_line!(self, "{}_deserialize,", external_scanner_name);
add_line!(self, "{external_scanner_name}_create,");
add_line!(self, "{external_scanner_name}_destroy,");
add_line!(self, "{external_scanner_name}_scan,");
add_line!(self, "{external_scanner_name}_serialize,");
add_line!(self, "{external_scanner_name}_deserialize,");
dedent!(self);
add_line!(self, "}},");
}
@ -1511,8 +1487,8 @@ impl Generator {
self.symbol_ids.insert(symbol, id);
}
fn field_id(&self, field_name: &String) -> String {
format!("field_{}", field_name)
fn field_id(&self, field_name: &str) -> String {
format!("field_{field_name}")
}
fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
@ -1618,7 +1594,7 @@ impl Generator {
'0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
' ' => break 'special_chars,
};
if !result.is_empty() && !result.ends_with("_") {
if !result.is_empty() && !result.ends_with('_') {
result.push('_');
}
result += replacement;
@ -1664,9 +1640,9 @@ impl Generator {
'\r' => add!(self, "'\\r'"),
_ => {
if c == ' ' || c.is_ascii_graphic() {
add!(self, "'{}'", c)
add!(self, "'{c}'");
} else {
add!(self, "{}", c as u32)
add!(self, "{}", c as u32);
}
}
}
@ -1691,7 +1667,8 @@ impl Generator {
/// * `abi_version` - The language ABI version that should be generated. Usually
/// you want Tree-sitter's current version, but right after making an ABI
/// change, it may be useful to generate code with the previous ABI.
pub(crate) fn render_c_code(
#[allow(clippy::too_many_arguments)]
pub fn render_c_code(
name: &str,
parse_table: ParseTable,
main_lex_table: LexTable,
@ -1702,12 +1679,10 @@ pub(crate) fn render_c_code(
default_aliases: AliasMap,
abi_version: usize,
) -> String {
if !(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version) {
panic!(
"This version of Tree-sitter can only generate parsers with ABI version {} - {}, not {}",
ABI_VERSION_MIN, ABI_VERSION_MAX, abi_version
);
}
assert!(
(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
"This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}",
);
Generator {
buffer: String::new(),

View file

@ -4,7 +4,7 @@ use std::iter::FromIterator;
use std::{collections::HashMap, fmt};
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum SymbolType {
pub enum SymbolType {
External,
End,
EndOfNonTerminalExtra,
@ -13,28 +13,29 @@ pub(crate) enum SymbolType {
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) enum Associativity {
pub enum Associativity {
Left,
Right,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct Alias {
pub struct Alias {
pub value: String,
pub is_named: bool,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
pub enum Precedence {
#[default]
None,
Integer(i32),
Name(String),
}
pub(crate) type AliasMap = HashMap<Symbol, Alias>;
pub type AliasMap = HashMap<Symbol, Alias>;
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash)]
pub(crate) struct MetadataParams {
pub struct MetadataParams {
pub precedence: Precedence,
pub dynamic_precedence: i32,
pub associativity: Option<Associativity>,
@ -47,13 +48,13 @@ pub(crate) struct MetadataParams {
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct Symbol {
pub struct Symbol {
pub kind: SymbolType,
pub index: usize,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) enum Rule {
pub enum Rule {
Blank,
String(String),
Pattern(String, String),
@ -73,7 +74,7 @@ pub(crate) enum Rule {
// index corresponding to a token, and each value representing whether or not
// the token is present in the set.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) struct TokenSet {
pub struct TokenSet {
terminal_bits: SmallBitVec,
external_bits: SmallBitVec,
eof: bool,
@ -81,76 +82,76 @@ pub(crate) struct TokenSet {
}
impl Rule {
pub fn field(name: String, content: Rule) -> Self {
pub fn field(name: String, content: Self) -> Self {
add_metadata(content, move |params| {
params.field_name = Some(name);
})
}
pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
pub fn alias(content: Self, value: String, is_named: bool) -> Self {
add_metadata(content, move |params| {
params.alias = Some(Alias { is_named, value });
params.alias = Some(Alias { value, is_named });
})
}
pub fn token(content: Rule) -> Self {
pub fn token(content: Self) -> Self {
add_metadata(content, |params| {
params.is_token = true;
})
}
pub fn immediate_token(content: Rule) -> Self {
pub fn immediate_token(content: Self) -> Self {
add_metadata(content, |params| {
params.is_token = true;
params.is_main_token = true;
})
}
pub fn prec(value: Precedence, content: Rule) -> Self {
pub fn prec(value: Precedence, content: Self) -> Self {
add_metadata(content, |params| {
params.precedence = value;
})
}
pub fn prec_left(value: Precedence, content: Rule) -> Self {
pub fn prec_left(value: Precedence, content: Self) -> Self {
add_metadata(content, |params| {
params.associativity = Some(Associativity::Left);
params.precedence = value;
})
}
pub fn prec_right(value: Precedence, content: Rule) -> Self {
pub fn prec_right(value: Precedence, content: Self) -> Self {
add_metadata(content, |params| {
params.associativity = Some(Associativity::Right);
params.precedence = value;
})
}
pub fn prec_dynamic(value: i32, content: Rule) -> Self {
pub fn prec_dynamic(value: i32, content: Self) -> Self {
add_metadata(content, |params| {
params.dynamic_precedence = value;
})
}
pub fn repeat(rule: Rule) -> Self {
Rule::Repeat(Box::new(rule))
pub fn repeat(rule: Self) -> Self {
Self::Repeat(Box::new(rule))
}
pub fn choice(rules: Vec<Rule>) -> Self {
pub fn choice(rules: Vec<Self>) -> Self {
let mut elements = Vec::with_capacity(rules.len());
for rule in rules {
choice_helper(&mut elements, rule);
}
Rule::Choice(elements)
Self::Choice(elements)
}
pub fn seq(rules: Vec<Rule>) -> Self {
Rule::Seq(rules)
pub fn seq(rules: Vec<Self>) -> Self {
Self::Seq(rules)
}
}
impl Alias {
pub fn kind(&self) -> VariableType {
pub const fn kind(&self) -> VariableType {
if self.is_named {
VariableType::Named
} else {
@ -160,35 +161,35 @@ impl Alias {
}
impl Precedence {
pub fn is_none(&self) -> bool {
matches!(self, Precedence::None)
pub const fn is_none(&self) -> bool {
matches!(self, Self::None)
}
}
#[cfg(test)]
impl Rule {
pub fn terminal(index: usize) -> Self {
Rule::Symbol(Symbol::terminal(index))
Self::Symbol(Symbol::terminal(index))
}
pub fn non_terminal(index: usize) -> Self {
Rule::Symbol(Symbol::non_terminal(index))
Self::Symbol(Symbol::non_terminal(index))
}
pub fn external(index: usize) -> Self {
Rule::Symbol(Symbol::external(index))
Self::Symbol(Symbol::external(index))
}
pub fn named(name: &'static str) -> Self {
Rule::NamedSymbol(name.to_string())
Self::NamedSymbol(name.to_string())
}
pub fn string(value: &'static str) -> Self {
Rule::String(value.to_string())
Self::String(value.to_string())
}
pub fn pattern(value: &'static str, flags: &'static str) -> Self {
Rule::Pattern(value.to_string(), flags.to_string())
Self::Pattern(value.to_string(), flags.to_string())
}
}
@ -209,36 +210,36 @@ impl Symbol {
self.kind == SymbolType::End
}
pub fn non_terminal(index: usize) -> Self {
Symbol {
pub const fn non_terminal(index: usize) -> Self {
Self {
kind: SymbolType::NonTerminal,
index,
}
}
pub fn terminal(index: usize) -> Self {
Symbol {
pub const fn terminal(index: usize) -> Self {
Self {
kind: SymbolType::Terminal,
index,
}
}
pub fn external(index: usize) -> Self {
Symbol {
pub const fn external(index: usize) -> Self {
Self {
kind: SymbolType::External,
index,
}
}
pub fn end() -> Self {
Symbol {
pub const fn end() -> Self {
Self {
kind: SymbolType::End,
index: 0,
}
}
pub fn end_of_nonterminal_extra() -> Self {
Symbol {
pub const fn end_of_nonterminal_extra() -> Self {
Self {
kind: SymbolType::EndOfNonTerminalExtra,
index: 0,
}
@ -247,7 +248,7 @@ impl Symbol {
impl From<Symbol> for Rule {
fn from(symbol: Symbol) -> Self {
Rule::Symbol(symbol)
Self::Symbol(symbol)
}
}
@ -261,7 +262,7 @@ impl TokenSet {
}
}
pub fn iter<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
pub fn iter(&self) -> impl Iterator<Item = Symbol> + '_ {
self.terminal_bits
.iter()
.enumerate()
@ -292,7 +293,7 @@ impl TokenSet {
})
}
pub fn terminals<'a>(&'a self) -> impl Iterator<Item = Symbol> + 'a {
pub fn terminals(&self) -> impl Iterator<Item = Symbol> + '_ {
self.terminal_bits
.iter()
.enumerate()
@ -361,11 +362,9 @@ impl TokenSet {
};
}
};
if other.index < vec.len() {
if vec[other.index] {
vec.set(other.index, false);
return true;
}
if other.index < vec.len() && vec[other.index] {
vec.set(other.index, false);
return true;
}
false
}
@ -377,7 +376,7 @@ impl TokenSet {
&& !self.external_bits.iter().any(|a| a)
}
pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool {
pub fn insert_all_terminals(&mut self, other: &Self) -> bool {
let mut result = false;
if other.terminal_bits.len() > self.terminal_bits.len() {
self.terminal_bits.resize(other.terminal_bits.len(), false);
@ -391,7 +390,7 @@ impl TokenSet {
result
}
fn insert_all_externals(&mut self, other: &TokenSet) -> bool {
fn insert_all_externals(&mut self, other: &Self) -> bool {
let mut result = false;
if other.external_bits.len() > self.external_bits.len() {
self.external_bits.resize(other.external_bits.len(), false);
@ -405,7 +404,7 @@ impl TokenSet {
result
}
pub fn insert_all(&mut self, other: &TokenSet) -> bool {
pub fn insert_all(&mut self, other: &Self) -> bool {
let mut result = false;
if other.eof {
result |= !self.eof;
@ -466,15 +465,9 @@ fn choice_helper(result: &mut Vec<Rule>, rule: Rule) {
impl fmt::Display for Precedence {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Precedence::Integer(i) => write!(f, "{}", i),
Precedence::Name(s) => write!(f, "'{}'", s),
Precedence::None => write!(f, "none"),
Self::Integer(i) => write!(f, "{i}"),
Self::Name(s) => write!(f, "'{s}'"),
Self::None => write!(f, "none"),
}
}
}
impl Default for Precedence {
fn default() -> Self {
Precedence::None
}
}

View file

@ -1,9 +1,9 @@
use super::nfa::CharacterSet;
use super::rules::{Alias, Symbol, TokenSet};
use std::collections::BTreeMap;
pub(crate) type ProductionInfoId = usize;
pub(crate) type ParseStateId = usize;
pub(crate) type LexStateId = usize;
pub type ProductionInfoId = usize;
pub type ParseStateId = usize;
pub type LexStateId = usize;
use std::hash::BuildHasherDefault;
@ -11,7 +11,7 @@ use indexmap::IndexMap;
use rustc_hash::FxHasher;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ParseAction {
pub enum ParseAction {
Accept,
Shift {
state: ParseStateId,
@ -28,19 +28,19 @@ pub(crate) enum ParseAction {
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum GotoAction {
pub enum GotoAction {
Goto(ParseStateId),
ShiftExtra,
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct ParseTableEntry {
pub struct ParseTableEntry {
pub actions: Vec<ParseAction>,
pub reusable: bool,
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct ParseState {
pub struct ParseState {
pub id: ParseStateId,
pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
@ -50,19 +50,19 @@ pub(crate) struct ParseState {
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub(crate) struct FieldLocation {
pub struct FieldLocation {
pub index: usize,
pub inherited: bool,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct ProductionInfo {
pub struct ProductionInfo {
pub alias_sequence: Vec<Option<Alias>>,
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct ParseTable {
pub struct ParseTable {
pub states: Vec<ParseState>,
pub symbols: Vec<Symbol>,
pub production_infos: Vec<ProductionInfo>,
@ -71,25 +71,25 @@ pub(crate) struct ParseTable {
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct AdvanceAction {
pub struct AdvanceAction {
pub state: LexStateId,
pub in_main_token: bool,
}
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct LexState {
pub struct LexState {
pub accept_action: Option<Symbol>,
pub eof_action: Option<AdvanceAction>,
pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct LexTable {
#[derive(Debug, PartialEq, Eq, Default)]
pub struct LexTable {
pub states: Vec<LexState>,
}
impl ParseTableEntry {
pub fn new() -> Self {
pub const fn new() -> Self {
Self {
reusable: true,
actions: Vec::new(),
@ -97,19 +97,13 @@ impl ParseTableEntry {
}
}
impl Default for LexTable {
fn default() -> Self {
LexTable { states: Vec::new() }
}
}
impl ParseState {
pub fn is_end_of_non_terminal_extra(&self) -> bool {
self.terminal_entries
.contains_key(&Symbol::end_of_nonterminal_extra())
}
pub fn referenced_states<'a>(&'a self) -> impl Iterator<Item = ParseStateId> + 'a {
pub fn referenced_states(&self) -> impl Iterator<Item = ParseStateId> + '_ {
self.terminal_entries
.iter()
.flat_map(|(_, entry)| {
@ -129,7 +123,7 @@ impl ParseState {
pub fn update_referenced_states<F>(&mut self, mut f: F)
where
F: FnMut(usize, &ParseState) -> usize,
F: FnMut(usize, &Self) -> usize,
{
let mut updates = Vec::new();
for (symbol, entry) in &self.terminal_entries {

View file

@ -12,7 +12,7 @@ use std::{fs, io, path, str, usize};
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
use tree_sitter_loader::Loader;
pub const HTML_HEADER: &'static str = "
pub const HTML_HEADER: &str = "
<!doctype HTML>
<head>
<title>Tree-sitter Highlighting</title>
@ -34,7 +34,7 @@ pub const HTML_HEADER: &'static str = "
<body>
";
pub const HTML_FOOTER: &'static str = "
pub const HTML_FOOTER: &str = "
</body>
";
@ -67,13 +67,14 @@ impl Theme {
Ok(serde_json::from_str(&json).unwrap_or_default())
}
#[must_use]
pub fn default_style(&self) -> Style {
Style::default()
}
}
impl<'de> Deserialize<'de> for Theme {
fn deserialize<D>(deserializer: D) -> std::result::Result<Theme, D::Error>
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
@ -181,17 +182,17 @@ fn parse_style(style: &mut Style, json: Value) {
match property_name.as_str() {
"bold" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.bold()
style.ansi = style.ansi.bold();
}
}
"italic" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.italic()
style.ansi = style.ansi.italic();
}
}
"underline" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.underline()
style.ansi = style.ansi.underline();
}
}
"color" => {
@ -219,10 +220,7 @@ fn parse_style(style: &mut Style, json: Value) {
fn parse_color(json: Value) -> Option<Color> {
match json {
Value::Number(n) => match n.as_u64() {
Some(n) => Some(Color::Fixed(n as u8)),
_ => None,
},
Value::Number(n) => n.as_u64().map(|n| Color::Fixed(n as u8)),
Value::String(s) => match s.to_lowercase().as_str() {
"black" => Some(Color::Black),
"blue" => Some(Color::Blue),
@ -233,7 +231,7 @@ fn parse_color(json: Value) -> Option<Color> {
"white" => Some(Color::White),
"yellow" => Some(Color::Yellow),
s => {
if let Some((red, green, blue)) = hex_string_to_rgb(&s) {
if let Some((red, green, blue)) = hex_string_to_rgb(s) {
Some(Color::RGB(red, green, blue))
} else {
None
@ -245,7 +243,7 @@ fn parse_color(json: Value) -> Option<Color> {
}
fn hex_string_to_rgb(s: &str) -> Option<(u8, u8, u8)> {
if s.starts_with("#") && s.len() >= 7 {
if s.starts_with('#') && s.len() >= 7 {
if let (Ok(red), Ok(green), Ok(blue)) = (
u8::from_str_radix(&s[1..3], 16),
u8::from_str_radix(&s[3..5], 16),
@ -280,7 +278,7 @@ fn style_to_css(style: ansi_term::Style) -> String {
fn write_color(buffer: &mut String, color: Color) {
if let Color::RGB(r, g, b) = &color {
write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap()
write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap();
} else {
write!(
buffer,
@ -298,18 +296,14 @@ fn write_color(buffer: &mut String, color: Color) {
Color::RGB(_, _, _) => unreachable!(),
}
)
.unwrap()
.unwrap();
}
}
fn terminal_supports_truecolor() -> bool {
use std::env;
if let Ok(truecolor) = env::var("COLORTERM") {
std::env::var("COLORTERM").map_or(false, |truecolor| {
truecolor == "truecolor" || truecolor == "24bit"
} else {
false
}
})
}
fn closest_xterm_color(red: u8, green: u8, blue: u8) -> Color {
@ -399,25 +393,23 @@ pub fn html(
let mut renderer = HtmlRenderer::new();
renderer.render(events, source, &move |highlight| {
if let Some(css_style) = &theme.styles[highlight.0].css {
css_style.as_bytes()
} else {
"".as_bytes()
}
theme.styles[highlight.0]
.css
.as_ref()
.map_or_else(|| "".as_bytes(), |css_style| css_style.as_bytes())
})?;
if !quiet {
write!(&mut stdout, "<table>\n")?;
writeln!(&mut stdout, "<table>")?;
for (i, line) in renderer.lines().enumerate() {
write!(
writeln!(
&mut stdout,
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
"<tr><td class=line-number>{}</td><td class=line>{line}</td></tr>",
i + 1,
line
)?;
}
write!(&mut stdout, "</table>\n")?;
writeln!(&mut stdout, "</table>")?;
}
if print_time {
@ -432,8 +424,8 @@ mod tests {
use super::*;
use std::env;
const JUNGLE_GREEN: &'static str = "#26A69A";
const DARK_CYAN: &'static str = "#00AF87";
const JUNGLE_GREEN: &str = "#26A69A";
const DARK_CYAN: &str = "#00AF87";
#[test]
fn test_parse_style() {

View file

@ -15,7 +15,7 @@ use tree_sitter_highlight::Highlighter;
use tree_sitter_loader as loader;
use tree_sitter_tags::TagsContext;
const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION");
const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
const DEFAULT_GENERATE_ABI_VERSION: usize = 14;
@ -29,18 +29,17 @@ fn main() {
}
}
if !err.to_string().is_empty() {
eprintln!("{:?}", err);
eprintln!("{err:?}");
}
std::process::exit(1);
}
}
fn run() -> Result<()> {
let version = if let Some(build_sha) = BUILD_SHA {
format!("{} ({})", BUILD_VERSION, build_sha)
} else {
BUILD_VERSION.to_string()
};
let version = BUILD_SHA.map_or_else(
|| BUILD_VERSION.to_string(),
|build_sha| format!("{BUILD_VERSION} ({build_sha})"),
);
let debug_arg = Arg::with_name("debug")
.help("Show parsing debug log")
@ -414,7 +413,7 @@ fn run() -> Result<()> {
let language = languages
.first()
.ok_or_else(|| anyhow!("No language found"))?;
parser.set_language(&language)?;
parser.set_language(language)?;
let test_dir = current_dir.join("test");
@ -435,7 +434,7 @@ fn run() -> Result<()> {
}
// Check that all of the queries are valid.
test::check_queries_at_path(language.clone(), &current_dir.join("queries"))?;
test::check_queries_at_path(language, &current_dir.join("queries"))?;
// Run the syntax highlighting tests.
let test_highlight_dir = test_dir.join("highlight");
@ -487,7 +486,7 @@ fn run() -> Result<()> {
let time = matches.is_present("time");
let edits = matches
.values_of("edits")
.map_or(Vec::new(), |e| e.collect());
.map_or(Vec::new(), std::iter::Iterator::collect);
let cancellation_flag = util::cancel_on_signal();
let mut parser = Parser::new();
@ -509,7 +508,7 @@ fn run() -> Result<()> {
let timeout = matches
.value_of("timeout")
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
.map_or(0, |t| t.parse::<u64>().unwrap());
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
@ -544,7 +543,7 @@ fn run() -> Result<()> {
encoding,
};
let this_file_errored = parse::parse_file_at_path(&mut parser, opts)?;
let this_file_errored = parse::parse_file_at_path(&mut parser, &opts)?;
if should_track_stats {
stats.total_parses += 1;
@ -557,7 +556,7 @@ fn run() -> Result<()> {
}
if should_track_stats {
println!("{}", stats)
println!("{stats}");
}
if has_error {
@ -579,20 +578,20 @@ fn run() -> Result<()> {
)?;
let query_path = Path::new(matches.value_of("query-path").unwrap());
let byte_range = matches.value_of("byte-range").and_then(|arg| {
let mut parts = arg.split(":");
let mut parts = arg.split(':');
let start = parts.next()?.parse().ok()?;
let end = parts.next().unwrap().parse().ok()?;
Some(start..end)
});
let point_range = matches.value_of("row-range").and_then(|arg| {
let mut parts = arg.split(":");
let mut parts = arg.split(':');
let start = parts.next()?.parse().ok()?;
let end = parts.next().unwrap().parse().ok()?;
Some(Point::new(start, 0)..Point::new(end, 0))
});
let should_test = matches.is_present("test");
query::query_files_at_paths(
language,
&language,
paths,
query_path,
ordered_captures,
@ -640,30 +639,29 @@ fn run() -> Result<()> {
if let Some(scope) = matches.value_of("scope") {
language = loader.language_configuration_for_scope(scope)?;
if language.is_none() {
return Err(anyhow!("Unknown scope '{}'", scope));
return Err(anyhow!("Unknown scope '{scope}'"));
}
}
let query_paths = matches.values_of("query-paths").map_or(None, |e| {
Some(
e.collect::<Vec<_>>()
.into_iter()
.map(|s| s.to_string())
.collect::<Vec<_>>(),
)
let query_paths = matches.values_of("query-paths").map(|e| {
e.collect::<Vec<_>>()
.into_iter()
.map(std::string::ToString::to_string)
.collect::<Vec<_>>()
});
for path in paths {
let path = Path::new(&path);
let (language, language_config) = match language.clone() {
Some(v) => v,
None => match loader.language_configuration_for_file_name(path)? {
Some(v) => v,
None => {
eprintln!("No language found for path {:?}", path);
None => {
if let Some(v) = loader.language_configuration_for_file_name(path)? {
v
} else {
eprintln!("No language found for path {path:?}");
continue;
}
},
}
};
if let Some(highlight_config) = language_config.highlight_config(
@ -700,7 +698,7 @@ fn run() -> Result<()> {
}
);
for name in names {
eprintln!("* {}", name);
eprintln!("* {name}");
}
}
}
@ -727,7 +725,7 @@ fn run() -> Result<()> {
)?;
}
} else {
eprintln!("No syntax highlighting config found for path {:?}", path);
eprintln!("No syntax highlighting config found for path {path:?}");
}
}
@ -786,7 +784,7 @@ fn collect_paths<'a>(
) -> Result<Vec<String>> {
if let Some(paths_file) = paths_file {
return Ok(fs::read_to_string(paths_file)
.with_context(|| format!("Failed to read paths file {}", paths_file))?
.with_context(|| format!("Failed to read paths file {paths_file}"))?
.trim()
.lines()
.map(String::from)
@ -799,25 +797,22 @@ fn collect_paths<'a>(
let mut incorporate_path = |path: &str, positive| {
if positive {
result.push(path.to_string());
} else {
if let Some(index) = result.iter().position(|p| p == path) {
result.remove(index);
}
} else if let Some(index) = result.iter().position(|p| p == path) {
result.remove(index);
}
};
for mut path in paths {
let mut positive = true;
if path.starts_with("!") {
if path.starts_with('!') {
positive = false;
path = path.trim_start_matches("!");
path = path.trim_start_matches('!');
}
if Path::new(path).exists() {
incorporate_path(path, positive);
} else {
let paths =
glob(path).with_context(|| format!("Invalid glob pattern {:?}", path))?;
let paths = glob(path).with_context(|| format!("Invalid glob pattern {path:?}"))?;
for path in paths {
if let Some(path) = path?.to_str() {
incorporate_path(path, positive);

View file

@ -22,11 +22,11 @@ pub struct Stats {
impl fmt::Display for Stats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%",
writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%",
self.total_parses,
self.successful_parses,
self.total_parses - self.successful_parses,
(self.successful_parses as f64) / (self.total_parses as f64) * 100.0);
(self.successful_parses as f64) / (self.total_parses as f64) * 100.0)
}
}
@ -52,7 +52,7 @@ pub struct ParseFileOptions<'a> {
pub encoding: Option<u32>,
}
pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result<bool> {
pub fn parse_file_at_path(parser: &mut Parser, opts: &ParseFileOptions) -> Result<bool> {
let mut _log_session = None;
parser.set_language(&opts.language)?;
let mut source_code = fs::read(opts.path)
@ -73,9 +73,9 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
else if opts.debug {
parser.set_logger(Some(Box::new(|log_type, message| {
if log_type == LogType::Lex {
io::stderr().write(b" ").unwrap();
io::stderr().write_all(b" ").unwrap();
}
write!(&mut io::stderr(), "{}\n", message).unwrap();
writeln!(&mut io::stderr(), "{message}").unwrap();
})));
}
@ -120,12 +120,13 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
tree = parser.parse(&source_code, Some(&tree)).unwrap();
if opts.debug_graph {
println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
println!("AFTER {i}:\n{}", String::from_utf8_lossy(&source_code));
}
}
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
let duration_ms =
duration.as_secs() * 1000 + u64::from(duration.subsec_nanos()) / 1_000_000;
let mut cursor = tree.walk();
if matches!(opts.output, ParseOutput::Normal) {
@ -137,7 +138,7 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
let is_named = node.is_named();
if did_visit_children {
if is_named {
stdout.write(b")")?;
stdout.write_all(b")")?;
needs_newline = true;
}
if cursor.goto_next_sibling() {
@ -151,15 +152,15 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
} else {
if is_named {
if needs_newline {
stdout.write(b"\n")?;
stdout.write_all(b"\n")?;
}
for _ in 0..indent_level {
stdout.write(b" ")?;
stdout.write_all(b" ")?;
}
let start = node.start_position();
let end = node.end_position();
if let Some(field_name) = cursor.field_name() {
write!(&mut stdout, "{}: ", field_name)?;
write!(&mut stdout, "{field_name}: ")?;
}
write!(
&mut stdout,
@ -181,7 +182,7 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
}
}
cursor.reset(tree.root_node());
println!("");
println!();
}
if matches!(opts.output, ParseOutput::Xml) {
@ -195,7 +196,7 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
if did_visit_children {
if is_named {
let tag = tags.pop();
write!(&mut stdout, "</{}>\n", tag.expect("there is a tag"))?;
writeln!(&mut stdout, "</{}>", tag.expect("there is a tag"))?;
needs_newline = true;
}
if cursor.goto_next_sibling() {
@ -209,14 +210,14 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
} else {
if is_named {
if needs_newline {
stdout.write(b"\n")?;
stdout.write_all(b"\n")?;
}
for _ in 0..indent_level {
stdout.write(b" ")?;
stdout.write_all(b" ")?;
}
write!(&mut stdout, "<{}", node.kind())?;
if let Some(field_name) = cursor.field_name() {
write!(&mut stdout, " type=\"{}\"", field_name)?;
write!(&mut stdout, " type=\"{field_name}\"")?;
}
write!(&mut stdout, ">")?;
tags.push(node.kind());
@ -236,7 +237,7 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
}
}
cursor.reset(tree.root_node());
println!("");
println!();
}
if matches!(opts.output, ParseOutput::Dot) {
@ -250,10 +251,8 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
if node.is_error() || node.is_missing() {
first_error = Some(node);
break;
} else {
if !cursor.goto_first_child() {
break;
}
} else if !cursor.goto_first_child() {
break;
}
} else if !cursor.goto_next_sibling() {
break;
@ -263,9 +262,8 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
if first_error.is_some() || opts.print_time {
write!(
&mut stdout,
"{:width$}\t{} ms",
"{:width$}\t{duration_ms} ms",
opts.path.to_str().unwrap(),
duration_ms,
width = opts.max_path_length
)?;
if let Some(node) = first_error {
@ -279,7 +277,7 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
write!(
&mut stdout,
"MISSING \"{}\"",
node.kind().replace("\n", "\\n")
node.kind().replace('\n', "\\n")
)?;
}
} else {
@ -291,18 +289,18 @@ pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result
start.row, start.column, end.row, end.column
)?;
}
write!(&mut stdout, "\n")?;
writeln!(&mut stdout)?;
}
return Ok(first_error.is_some());
} else if opts.print_time {
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
let duration_ms =
duration.as_secs() * 1000 + u64::from(duration.subsec_nanos()) / 1_000_000;
writeln!(
&mut stdout,
"{:width$}\t{} ms (timed out)",
"{:width$}\t{duration_ms} ms (timed out)",
opts.path.to_str().unwrap(),
duration_ms,
width = opts.max_path_length
)?;
}
@ -316,7 +314,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result
let new_end_byte = edit.position + edit.inserted_text.len();
let start_position = position_for_offset(input, start_byte)?;
let old_end_position = position_for_offset(input, old_end_byte)?;
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().copied());
let new_end_position = position_for_offset(input, new_end_byte)?;
let edit = InputEdit {
start_byte,
@ -330,7 +328,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result
Ok(edit)
}
fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
fn parse_edit_flag(source_code: &[u8], flag: &str) -> Result<Edit> {
let error = || {
anyhow!(concat!(
"Invalid edit string '{}'. ",
@ -342,7 +340,7 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
// * edit position
// * deleted length
// * inserted text
let mut parts = flag.split(" ");
let mut parts = flag.split(' ');
let position = parts.next().ok_or_else(error)?;
let deleted_length = parts.next().ok_or_else(error)?;
let inserted_text = parts.collect::<Vec<_>>().join(" ").into_bytes();
@ -350,19 +348,19 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
// Position can either be a byte_offset or row,column pair, separated by a comma
let position = if position == "$" {
source_code.len()
} else if position.contains(",") {
let mut parts = position.split(",");
} else if position.contains(',') {
let mut parts = position.split(',');
let row = parts.next().ok_or_else(error)?;
let row = usize::from_str_radix(row, 10).map_err(|_| error())?;
let row = row.parse::<usize>().map_err(|_| error())?;
let column = parts.next().ok_or_else(error)?;
let column = usize::from_str_radix(column, 10).map_err(|_| error())?;
let column = column.parse::<usize>().map_err(|_| error())?;
offset_for_position(source_code, Point { row, column })?
} else {
usize::from_str_radix(position, 10).map_err(|_| error())?
position.parse::<usize>().map_err(|_| error())?
};
// Deleted length must be a byte count.
let deleted_length = usize::from_str_radix(deleted_length, 10).map_err(|_| error())?;
let deleted_length = deleted_length.parse::<usize>().map_err(|_| error())?;
Ok(Edit {
position,

View file

@ -36,22 +36,21 @@ optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
} else {
Cow::Borrowed(include_bytes!("playground.html"))
}
tree_sitter_dir.map_or(
Cow::Borrowed(include_bytes!("playground.html")),
|tree_sitter_dir| {
Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
},
)
}
pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
let server = get_server()?;
let (grammar_name, language_wasm) = wasm::load_language_wasm_file(&grammar_path).unwrap();
let (grammar_name, language_wasm) = wasm::load_language_wasm_file(grammar_path).unwrap();
let url = format!("http://{}", server.server_addr());
println!("Started playground on: {}", url);
if open_in_browser {
if let Err(_) = webbrowser::open(&url) {
eprintln!("Failed to open '{}' in a web browser", url);
}
println!("Started playground on: {url}");
if open_in_browser && webbrowser::open(&url).is_err() {
eprintln!("Failed to open '{url}' in a web browser");
}
let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
@ -102,7 +101,7 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
Ok(())
}
fn redirect<'a>(url: &'a str) -> Response<&'a [u8]> {
fn redirect(url: &str) -> Response<&[u8]> {
Response::empty(302)
.with_data("".as_bytes(), Some(0))
.with_header(Header::from_bytes("Location", url.as_bytes()).unwrap())
@ -115,7 +114,7 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> {
}
fn get_server() -> Result<Server> {
let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned());
let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or_else(|_| "127.0.0.1".to_owned());
let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
.map(|v| {
v.parse::<u16>()
@ -124,9 +123,9 @@ fn get_server() -> Result<Server> {
.ok();
let listener = match port {
Some(port) => {
bind_to(&*addr, port?).with_context(|| "Failed to bind to the specified port")?
bind_to(&addr, port?).with_context(|| "Failed to bind to the specified port")?
}
None => get_listener_on_available_port(&*addr)
None => get_listener_on_available_port(&addr)
.with_context(|| "Failed to find a free port to bind to it")?,
};
let server =

View file

@ -9,8 +9,9 @@ use std::{
};
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
#[allow(clippy::too_many_arguments)]
pub fn query_files_at_paths(
language: Language,
language: &Language,
paths: Vec<String>,
query_path: &Path,
ordered_captures: bool,
@ -24,8 +25,8 @@ pub fn query_files_at_paths(
let mut stdout = stdout.lock();
let query_source = fs::read_to_string(query_path)
.with_context(|| format!("Error reading query file {:?}", query_path))?;
let query = Query::new(&language, &query_source).with_context(|| "Query compilation failed")?;
.with_context(|| format!("Error reading query file {query_path:?}"))?;
let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?;
let mut query_cursor = QueryCursor::new();
if let Some(range) = byte_range {
@ -36,15 +37,15 @@ pub fn query_files_at_paths(
}
let mut parser = Parser::new();
parser.set_language(&language)?;
parser.set_language(language)?;
for path in paths {
let mut results = Vec::new();
writeln!(&mut stdout, "{}", path)?;
writeln!(&mut stdout, "{path}")?;
let source_code =
fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?;
fs::read(&path).with_context(|| format!("Error reading source file {path:?}"))?;
let tree = parser.parse(&source_code, None).unwrap();
let start = Instant::now();
@ -57,17 +58,16 @@ pub fn query_files_at_paths(
if !quiet {
writeln!(
&mut stdout,
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
" pattern: {:>2}, capture: {} - {capture_name}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture_name,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
name: (*capture_name).to_string(),
start: capture.node.start_position(),
end: capture.node.end_position(),
});
@ -85,23 +85,19 @@ pub fn query_files_at_paths(
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {} - {}, start: {}, end: {}, text: `{}`",
" capture: {} - {capture_name}, start: {start}, end: {end}, text: `{}`",
capture.index,
capture_name,
start,
end,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {}, start: {}, end: {}",
capture_name, start, end,
" capture: {capture_name}, start: {start}, end: {end}",
)?;
}
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
name: (*capture_name).to_string(),
start: capture.node.start_position(),
end: capture.node.end_position(),
});
@ -115,7 +111,7 @@ pub fn query_files_at_paths(
)?;
}
if should_test {
query_testing::assert_expected_captures(results, path, &mut parser, language.clone())?
query_testing::assert_expected_captures(&results, path, &mut parser, language)?;
}
if print_time {
writeln!(&mut stdout, "{:?}", start.elapsed())?;

View file

@ -23,6 +23,7 @@ pub struct Assertion {
}
impl Assertion {
#[must_use]
pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self {
Self {
position: Point::new(row, col),
@ -37,7 +38,7 @@ impl Assertion {
/// pairs.
pub fn parse_position_comments(
parser: &mut Parser,
language: Language,
language: &Language,
source: &[u8],
) -> Result<Vec<Assertion>> {
let mut result = Vec::new();
@ -45,7 +46,7 @@ pub fn parse_position_comments(
// Parse the code.
parser.set_included_ranges(&[]).unwrap();
parser.set_language(&language).unwrap();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
// Walk the tree, finding comment nodes that contain assertions.
@ -125,7 +126,7 @@ pub fn parse_position_comments(
// code *above* the assertion. There can be multiple lines of assertion comments,
// so the positions may have to be decremented by more than one row.
let mut i = 0;
for assertion in result.iter_mut() {
for assertion in &mut result {
loop {
let on_assertion_line = assertion_ranges[i..]
.iter()
@ -150,14 +151,14 @@ pub fn parse_position_comments(
}
pub fn assert_expected_captures(
infos: Vec<CaptureInfo>,
infos: &[CaptureInfo],
path: String,
parser: &mut Parser,
language: Language,
language: &Language,
) -> Result<()> {
let contents = fs::read_to_string(path)?;
let pairs = parse_position_comments(parser, language, contents.as_bytes())?;
for info in &infos {
for info in infos {
if let Some(found) = pairs.iter().find(|p| {
p.position.row == info.start.row && p.position >= info.start && p.position < info.end
}) {
@ -167,7 +168,7 @@ pub fn assert_expected_captures(
info.start,
found.expected_capture_name,
info.name
))?
))?;
}
}
}

View file

@ -18,7 +18,7 @@ pub fn generate_tags(
if let Some(scope) = scope {
lang = loader.language_configuration_for_scope(scope)?;
if lang.is_none() {
return Err(anyhow!("Unknown scope '{}'", scope));
return Err(anyhow!("Unknown scope '{scope}'"));
}
}
@ -31,24 +31,24 @@ pub fn generate_tags(
let path = Path::new(&path);
let (language, language_config) = match lang.clone() {
Some(v) => v,
None => match loader.language_configuration_for_file_name(path)? {
Some(v) => v,
None => {
eprintln!("No language found for path {:?}", path);
None => {
if let Some(v) = loader.language_configuration_for_file_name(path)? {
v
} else {
eprintln!("No language found for path {path:?}");
continue;
}
},
}
};
if let Some(tags_config) = language_config.tags_config(language)? {
let indent;
if paths.len() > 1 {
let indent = if paths.len() > 1 {
if !quiet {
writeln!(&mut stdout, "{}", path.to_string_lossy())?;
}
indent = "\t"
"\t"
} else {
indent = "";
""
};
let source = fs::read(path)?;
@ -61,8 +61,7 @@ pub fn generate_tags(
if !quiet {
write!(
&mut stdout,
"{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
indent,
"{indent}{:<10}\t | {:<8}\t{} {} - {} `{}`",
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
&tags_config.syntax_type_name(tag.syntax_type_id),
if tag.is_definition { "def" } else { "ref" },
@ -77,20 +76,15 @@ pub fn generate_tags(
write!(&mut stdout, "\t{:?}", &docs)?;
}
}
writeln!(&mut stdout, "")?;
writeln!(&mut stdout)?;
}
}
if time {
writeln!(
&mut stdout,
"{}time: {}ms",
indent,
t0.elapsed().as_millis(),
)?;
writeln!(&mut stdout, "{indent}time: {}ms", t0.elapsed().as_millis(),)?;
}
} else {
eprintln!("No tags config found for path {:?}", path);
eprintln!("No tags config found for path {path:?}");
}
}

View file

@ -48,7 +48,7 @@ pub enum TestEntry {
impl Default for TestEntry {
fn default() -> Self {
TestEntry::Group {
Self::Group {
name: String::new(),
children: Vec::new(),
file_path: None,
@ -72,9 +72,9 @@ pub fn run_tests_at_path(
} else if debug {
parser.set_logger(Some(Box::new(|log_type, message| {
if log_type == LogType::Lex {
io::stderr().write(b" ").unwrap();
io::stderr().write_all(b" ").unwrap();
}
write!(&mut io::stderr(), "{}\n", message).unwrap();
writeln!(&mut io::stderr(), "{message}").unwrap();
})));
}
@ -92,32 +92,32 @@ pub fn run_tests_at_path(
parser.stop_printing_dot_graphs();
if failures.len() > 0 {
println!("");
if !failures.is_empty() {
println!();
if update {
if failures.len() == 1 {
println!("1 update:\n")
println!("1 update:\n");
} else {
println!("{} updates:\n", failures.len())
println!("{} updates:\n", failures.len());
}
for (i, (name, ..)) in failures.iter().enumerate() {
println!(" {}. {}", i + 1, name);
println!(" {}. {name}", i + 1);
}
Ok(())
} else {
if failures.len() == 1 {
println!("1 failure:")
println!("1 failure:");
} else {
println!("{} failures:", failures.len())
println!("{} failures:", failures.len());
}
print_diff_key();
for (i, (name, actual, expected)) in failures.iter().enumerate() {
println!("\n {}. {}:", i + 1, name);
let actual = format_sexp_indented(&actual, 2);
let expected = format_sexp_indented(&expected, 2);
println!("\n {}. {name}:", i + 1);
let actual = format_sexp_indented(actual, 2);
let expected = format_sexp_indented(expected, 2);
print_diff(&actual, &expected);
}
Err(anyhow!(""))
@ -127,11 +127,11 @@ pub fn run_tests_at_path(
}
}
pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> {
pub fn check_queries_at_path(language: &Language, path: &Path) -> Result<()> {
if path.exists() {
for entry in WalkDir::new(path)
.into_iter()
.filter_map(|e| e.ok())
.filter_map(std::result::Result::ok)
.filter(|e| {
e.file_type().is_file()
&& e.path().extension().and_then(OsStr::to_str) == Some("scm")
@ -140,9 +140,9 @@ pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> {
{
let filepath = entry.file_name().to_str().unwrap_or("");
let content = fs::read_to_string(entry.path())
.with_context(|| format!("Error reading query file {:?}", filepath))?;
Query::new(&language, &content)
.with_context(|| format!("Error in query file {:?}", filepath))?;
.with_context(|| format!("Error reading query file {filepath:?}"))?;
Query::new(language, &content)
.with_context(|| format!("Error in query file {filepath:?}"))?;
}
}
Ok(())
@ -156,12 +156,12 @@ pub fn print_diff_key() {
);
}
pub fn print_diff(actual: &String, expected: &String) {
pub fn print_diff(actual: &str, expected: &str) {
let changeset = Changeset::new(actual, expected, "\n");
for diff in &changeset.diffs {
match diff {
Difference::Same(part) => {
print!("{}{}", part, changeset.split);
print!("{part}{}", changeset.split);
}
Difference::Add(part) => {
print!("{}{}", Colour::Green.paint(part), changeset.split);
@ -171,7 +171,7 @@ pub fn print_diff(actual: &String, expected: &String) {
}
}
}
println!("");
println!();
}
fn run_tests(
@ -211,7 +211,7 @@ fn run_tests(
let tree = parser.parse(&input, None).unwrap();
let mut actual = tree.root_node().to_sexp();
if !has_fields {
actual = strip_sexp_fields(actual);
actual = strip_sexp_fields(&actual);
}
print!("{}", " ".repeat(indent_level as usize));
if actual == output {
@ -252,7 +252,7 @@ fn run_tests(
} => {
if indent_level > 0 {
print!("{}", " ".repeat(indent_level as usize));
println!("{}:", name);
println!("{name}:");
}
let failure_count = failures.len();
@ -281,11 +281,11 @@ fn run_tests(
Ok(())
}
fn format_sexp(sexp: &String) -> String {
fn format_sexp(sexp: &str) -> String {
format_sexp_indented(sexp, 0)
}
fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
fn format_sexp_indented(sexp: &str, initial_indent_level: u32) -> String {
let mut formatted = String::new();
let mut indent_level = initial_indent_level;
@ -301,7 +301,7 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
has_field = false;
} else {
if indent_level > 0 {
writeln!(formatted, "").unwrap();
writeln!(formatted).unwrap();
for _ in 0..indent_level {
write!(formatted, " ").unwrap();
}
@ -310,20 +310,20 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
}
// "(node_name"
write!(formatted, "{}", s).unwrap();
write!(formatted, "{s}").unwrap();
// "(MISSING node_name" or "(UNEXPECTED 'x'"
if s.starts_with("(MISSING") || s.starts_with("(UNEXPECTED") {
let s = s_iter.next().unwrap();
write!(formatted, " {}", s).unwrap();
write!(formatted, " {s}").unwrap();
}
} else if s.ends_with(':') {
// "field:"
writeln!(formatted, "").unwrap();
writeln!(formatted).unwrap();
for _ in 0..indent_level {
write!(formatted, " ").unwrap();
}
write!(formatted, "{} ", s).unwrap();
write!(formatted, "{s} ").unwrap();
has_field = true;
indent_level += 1;
}
@ -334,7 +334,7 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
fn write_tests(
file_path: &Path,
corrected_entries: &Vec<(String, String, String, usize, usize)>,
corrected_entries: &[(String, String, String, usize, usize)],
) -> Result<()> {
let mut buffer = fs::File::create(file_path)?;
write_tests_to_buffer(&mut buffer, corrected_entries)
@ -342,21 +342,19 @@ fn write_tests(
fn write_tests_to_buffer(
buffer: &mut impl Write,
corrected_entries: &Vec<(String, String, String, usize, usize)>,
corrected_entries: &[(String, String, String, usize, usize)],
) -> Result<()> {
for (i, (name, input, output, header_delim_len, divider_delim_len)) in
corrected_entries.iter().enumerate()
{
if i > 0 {
write!(buffer, "\n")?;
writeln!(buffer)?;
}
write!(
buffer,
"{}\n{}\n{}\n{}\n{}\n\n{}\n",
"{}\n{name}\n{}\n{input}\n{}\n\n{}\n",
"=".repeat(*header_delim_len),
name,
"=".repeat(*header_delim_len),
input,
"-".repeat(*divider_delim_len),
output.trim()
)?;
@ -374,7 +372,7 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
let mut children = Vec::new();
for entry in fs::read_dir(path)? {
let entry = entry?;
let hidden = entry.file_name().to_str().unwrap_or("").starts_with(".");
let hidden = entry.file_name().to_str().unwrap_or("").starts_with('.');
if !hidden {
children.push(entry.path());
}
@ -382,7 +380,7 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
children.sort_by(|a, b| {
a.file_name()
.unwrap_or_default()
.cmp(&b.file_name().unwrap_or_default())
.cmp(b.file_name().unwrap_or_default())
});
let children = children
.iter()
@ -395,15 +393,16 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
})
} else {
let content = fs::read_to_string(path)?;
Ok(parse_test_content(name, content, Some(path.to_path_buf())))
Ok(parse_test_content(name, &content, Some(path.to_path_buf())))
}
}
pub fn strip_sexp_fields(sexp: String) -> String {
SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string()
#[must_use]
pub fn strip_sexp_fields(sexp: &str) -> String {
SEXP_FIELD_REGEX.replace_all(sexp, " (").to_string()
}
fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>) -> TestEntry {
fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -> TestEntry {
let mut children = Vec::new();
let bytes = content.as_bytes();
let mut prev_name = String::new();
@ -420,8 +419,8 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
// Find all of the `===` test headers, which contain the test names.
// Ignore any matches whose suffix does not match the first header
// suffix in the file.
let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| {
let header_delim_len = c.name("equals").map(|n| n.as_bytes().len()).unwrap_or(80);
let header_matches = HEADER_REGEX.captures_iter(bytes).filter_map(|c| {
let header_delim_len = c.name("equals").map_or(80, |m| m.as_bytes().len());
let suffix1 = c
.name("suffix1")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
@ -433,8 +432,7 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let test_name = c
.name("test_name")
.map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string());
let res = Some((header_delim_len, header_range, test_name));
res
Some((header_delim_len, header_range, test_name))
} else {
None
}
@ -451,18 +449,16 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let divider_range = DIVIDER_REGEX
.captures_iter(&bytes[prev_header_end..header_range.start])
.filter_map(|m| {
let divider_delim_len =
m.name("hyphens").map(|m| m.as_bytes().len()).unwrap_or(80);
let divider_delim_len = m.name("hyphens").map_or(80, |m| m.as_bytes().len());
let suffix = m
.name("suffix")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
if suffix == first_suffix {
let range = m.get(0).unwrap().range();
let res = Some((
Some((
divider_delim_len,
(prev_header_end + range.start)..(prev_header_end + range.end),
));
res
))
} else {
None
}
@ -539,8 +535,7 @@ d
---
(d)
"#
.trim()
.to_string(),
.trim(),
None,
);
@ -597,8 +592,7 @@ abc
(c (d))
"#
.trim()
.to_string(),
.trim(),
None,
);
@ -735,8 +729,7 @@ code
(MISSING ";")
"#
.trim()
.to_string(),
.trim(),
None,
);
@ -819,8 +812,7 @@ NOT A TEST HEADER
(a)
"#
.trim()
.to_string(),
.trim(),
None,
);
@ -885,8 +877,7 @@ name with === signs
code with ----
---
(d)
"#
.to_string(),
"#,
None,
);

View file

@ -31,7 +31,7 @@ impl std::fmt::Display for Failure {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "'{}'", actual_highlight)?;
write!(f, "'{actual_highlight}'")?;
}
}
Ok(())
@ -66,35 +66,36 @@ fn test_highlights_indented(
indent = "",
indent_level = indent_level * 2
);
if test_file_path.is_dir() && !test_file_path.read_dir()?.next().is_none() {
if test_file_path.is_dir() && test_file_path.read_dir()?.next().is_some() {
println!("{}:", test_file_name.into_string().unwrap());
if let Err(_) = test_highlights_indented(
if test_highlights_indented(
loader,
highlighter,
&test_file_path,
apply_all_captures,
indent_level + 1,
) {
)
.is_err()
{
failed = true;
}
} else {
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
.ok_or_else(|| anyhow!("No language found for path {test_file_path:?}"))?;
let highlight_config = language_config
.highlight_config(language, apply_all_captures, None)?
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
.ok_or_else(|| anyhow!("No highlighting config found for {test_file_path:?}"))?;
match test_highlight(
&loader,
loader,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
"✓ {} ({} assertions)",
"✓ {} ({assertion_count} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
@ -120,9 +121,9 @@ fn test_highlights_indented(
}
}
pub fn iterate_assertions(
assertions: &Vec<Assertion>,
highlights: &Vec<(Point, Point, Highlight)>,
highlight_names: &Vec<String>,
assertions: &[Assertion],
highlights: &[(Point, Point, Highlight)],
highlight_names: &[String],
) -> Result<usize> {
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
@ -137,40 +138,36 @@ pub fn iterate_assertions(
let mut passed = false;
actual_highlights.clear();
'highlight_loop: loop {
// The assertions are ordered by position, so skip past all of the highlights that
// end at or before this assertion's position.
if let Some(highlight) = highlights.get(i) {
if highlight.1 <= *position {
i += 1;
continue;
// The assertions are ordered by position, so skip past all of the highlights that
// end at or before this assertion's position.
'highlight_loop: while let Some(highlight) = highlights.get(i) {
if highlight.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the highlights that start at or before this assertion's,
// position, looking for one that matches the assertion.
let mut j = i;
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
if highlight.0 > *position {
break 'highlight_loop;
}
// Iterate through all of the highlights that start at or before this assertion's,
// position, looking for one that matches the assertion.
let mut j = i;
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
if highlight.0 > *position {
break 'highlight_loop;
}
// If the highlight matches the assertion, or if the highlight doesn't
// match the assertion but it's negative, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if (*highlight_name == *expected_highlight) == !negative {
passed = true;
break 'highlight_loop;
} else {
actual_highlights.push(highlight_name);
}
j += 1;
// If the highlight matches the assertion, or if the highlight doesn't
// match the assertion but it's negative, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if (*highlight_name == *expected_highlight) == *negative {
actual_highlights.push(highlight_name);
} else {
passed = true;
break 'highlight_loop;
}
} else {
break;
j += 1;
}
}
@ -197,11 +194,8 @@ pub fn test_highlight(
// Highlight the file, and parse out all of the highlighting assertions.
let highlight_names = loader.highlight_names();
let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?;
let assertions = parse_position_comments(
highlighter.parser(),
highlight_config.language.clone(),
source,
)?;
let assertions =
parse_position_comments(highlighter.parser(), &highlight_config.language, source)?;
iterate_assertions(&assertions, &highlights, &highlight_names)
}
@ -248,7 +242,7 @@ pub fn get_highlight_positions(
}
}
if let Some(highlight) = highlight_stack.last() {
result.push((start_position, Point::new(row, column), *highlight))
result.push((start_position, Point::new(row, column), *highlight));
}
}
}

View file

@ -31,7 +31,7 @@ impl std::fmt::Display for Failure {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "'{}'", actual_tag)?;
write!(f, "'{actual_tag}'")?;
}
}
Ok(())
@ -59,9 +59,8 @@ pub fn test_tags(loader: &Loader, tags_context: &mut TagsContext, directory: &Pa
) {
Ok(assertion_count) => {
println!(
" ✓ {} ({} assertions)",
" ✓ {} ({assertion_count} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
@ -69,7 +68,7 @@ pub fn test_tags(loader: &Loader, tags_context: &mut TagsContext, directory: &Pa
" ✗ {}",
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
);
println!(" {}", e);
println!(" {e}");
failed = true;
}
}
@ -88,8 +87,7 @@ pub fn test_tag(
source: &[u8],
) -> Result<usize> {
let tags = get_tag_positions(tags_context, tags_config, source)?;
let assertions =
parse_position_comments(tags_context.parser(), tags_config.language.clone(), source)?;
let assertions = parse_position_comments(tags_context.parser(), &tags_config.language, source)?;
// Iterate through all of the assertions, checking against the actual tags.
let mut i = 0;
@ -102,36 +100,32 @@ pub fn test_tag(
{
let mut passed = false;
'tag_loop: loop {
if let Some(tag) = tags.get(i) {
if tag.1 <= *position {
i += 1;
continue;
'tag_loop: while let Some(tag) = tags.get(i) {
if tag.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the tags that start at or before this assertion's
// position, looking for one that matches the assertion
let mut j = i;
while let (false, Some(tag)) = (passed, tags.get(j)) {
if tag.0 > *position {
break 'tag_loop;
}
// Iterate through all of the tags that start at or before this assertion's
// position, looking for one that matches the assertion
let mut j = i;
while let (false, Some(tag)) = (passed, tags.get(j)) {
if tag.0 > *position {
break 'tag_loop;
}
let tag_name = &tag.2;
if (*tag_name == *expected_tag) == !negative {
passed = true;
break 'tag_loop;
} else {
actual_tags.push(tag_name);
}
j += 1;
if tag == tags.last().unwrap() {
break 'tag_loop;
}
let tag_name = &tag.2;
if (*tag_name == *expected_tag) == *negative {
actual_tags.push(tag_name);
} else {
passed = true;
break 'tag_loop;
}
j += 1;
if tag == tags.last().unwrap() {
break 'tag_loop;
}
} else {
break;
}
}
@ -154,15 +148,15 @@ pub fn get_tag_positions(
tags_config: &TagsConfiguration,
source: &[u8],
) -> Result<Vec<(Point, Point, String)>> {
let (tags_iter, _has_error) = tags_context.generate_tags(&tags_config, &source, None)?;
let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?;
let tag_positions = tags_iter
.filter_map(|t| t.ok())
.filter_map(std::result::Result::ok)
.map(|tag| {
let tag_postfix = tags_config.syntax_type_name(tag.syntax_type_id).to_string();
let tag_name = if tag.is_definition {
format!("definition.{}", tag_postfix)
format!("definition.{tag_postfix}")
} else {
format!("reference.{}", tag_postfix)
format!("reference.{tag_postfix}")
};
(tag.span.start, tag.span.end, tag_name)
})

View file

@ -78,14 +78,14 @@ fn test_node_and_cursor_ref_in_fut() {
let fut_val = async {
yield_now().await;
root.to_sexp();
let _ = root.to_sexp();
};
yield_now().await;
let fut_ref = async {
yield_now().await;
root_ref.to_sexp();
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
};
@ -117,14 +117,14 @@ fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
let fut_val = || async {
yield_now().await;
root.to_sexp();
let _ = root.to_sexp();
};
yield_now().await;
let fut_ref = || async move {
yield_now().await;
root_ref.to_sexp();
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
};
@ -157,7 +157,7 @@ fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
root.to_sexp();
let _ = root.to_sexp();
cursor_ref.goto_first_child();
}
};
@ -172,7 +172,7 @@ fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
root_ref.to_sexp();
let _ = root_ref.to_sexp();
cursor_ref.goto_first_child();
}
};
@ -228,7 +228,7 @@ async fn yield_now() {
type Output = ();
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
cx.waker().clone().wake();
cx.waker().wake_by_ref();
if self.yielded {
return Poll::Ready(());
}

View file

@ -166,7 +166,7 @@ fn test_language_corpus(
let tree = parser.parse(&test.input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(actual_output);
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output != test.output {
@ -253,7 +253,7 @@ fn test_language_corpus(
// Verify that the final tree matches the expectation from the corpus.
let mut actual_output = tree3.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(actual_output);
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output != test.output {
@ -382,7 +382,7 @@ fn test_feature_corpus_files() {
let tree = parser.parse(&test.input, None).unwrap();
let mut actual_output = tree.root_node().to_sexp();
if !test.has_fields {
actual_output = strip_sexp_fields(actual_output);
actual_output = strip_sexp_fields(&actual_output);
}
if actual_output == test.output {
true

View file

@ -34,7 +34,7 @@ fn test_lookahead_iterator() {
lookahead.reset_state(next_state);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset(language.clone(), next_state);
lookahead.reset(&language, next_state);
assert!(lookahead
.map(|s| language.node_kind_for_id(s).unwrap())
.eq(expected_symbols));

View file

@ -665,12 +665,8 @@ fn test_parsing_with_a_timeout() {
None,
);
assert!(tree.is_none());
#[cfg(not(target_arch = "sparc64"))]
assert!(start_time.elapsed().as_micros() < 2000);
#[cfg(target_arch = "sparc64")]
assert!(start_time.elapsed().as_micros() < 8000);
// Continue parsing, but pause after 1 ms of processing.
parser.set_timeout_micros(5000);
let start_time = time::Instant::now();

View file

@ -4188,7 +4188,7 @@ fn test_query_is_pattern_guaranteed_at_step() {
];
allocations::record(|| {
eprintln!("");
eprintln!();
for row in rows.iter() {
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
@ -4283,7 +4283,7 @@ fn test_query_is_pattern_rooted() {
];
allocations::record(|| {
eprintln!("");
eprintln!();
let language = get_language("python");
for row in &rows {
@ -4381,7 +4381,7 @@ fn test_query_is_pattern_non_local() {
];
allocations::record(|| {
eprintln!("");
eprintln!();
for row in &rows {
if let Some(filter) = EXAMPLE_FILTER.as_ref() {
@ -4611,7 +4611,7 @@ fn test_capture_quantifiers() {
];
allocations::record(|| {
eprintln!("");
eprintln!();
for row in rows.iter() {
if let Some(filter) = EXAMPLE_FILTER.as_ref() {

View file

@ -29,7 +29,7 @@ fn test_highlight_test_with_basic_test() {
.join("\n");
let assertions =
parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap();
parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
assert_eq!(
assertions,
&[

View file

@ -22,7 +22,7 @@ fn test_tags_test_with_basic_test() {
.join("\n");
let assertions =
parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap();
parse_position_comments(&mut Parser::new(), &language, source.as_bytes()).unwrap();
assert_eq!(
assertions,

View file

@ -363,7 +363,7 @@ fn test_tree_cursor() {
assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });
let mut copy = tree.walk();
copy.reset_to(cursor);
copy.reset_to(&cursor);
assert_eq!(copy.node().kind(), "{");
assert_eq!(copy.node().is_named(), false);

View file

@ -20,6 +20,7 @@ svg { width: 100%; }
";
#[must_use]
pub fn cancel_on_signal() -> Arc<AtomicUsize> {
let result = Arc::new(AtomicUsize::new(0));
ctrlc::set_handler({
@ -72,7 +73,7 @@ impl LogSession {
use std::io::Write;
let mut dot_file = std::fs::File::create(path)?;
dot_file.write(HTML_HEADER)?;
dot_file.write_all(HTML_HEADER)?;
let mut dot_process = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())

View file

@ -4,15 +4,12 @@ use std::{fs, path::Path};
use tree_sitter_loader::Loader;
pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
let grammar_name = get_grammar_name(&language_dir)
let grammar_name = get_grammar_name(language_dir)
.with_context(|| "Failed to get wasm filename")
.unwrap();
let wasm_filename = format!("tree-sitter-{}.wasm", grammar_name);
let wasm_filename = format!("tree-sitter-{grammar_name}.wasm");
let contents = fs::read(language_dir.join(&wasm_filename)).with_context(|| {
format!(
"Failed to read {}. Run `tree-sitter build-wasm` first.",
wasm_filename
)
format!("Failed to read {wasm_filename}. Run `tree-sitter build-wasm` first.",)
})?;
Ok((grammar_name, contents))
}
@ -21,9 +18,9 @@ pub fn get_grammar_name(language_dir: &Path) -> Result<String> {
let src_dir = language_dir.join("src");
let grammar_json_path = src_dir.join("grammar.json");
let grammar_json = fs::read_to_string(&grammar_json_path)
.with_context(|| format!("Failed to read grammar file {:?}", grammar_json_path))?;
.with_context(|| format!("Failed to read grammar file {grammar_json_path:?}"))?;
let grammar: GrammarJSON = serde_json::from_str(&grammar_json)
.with_context(|| format!("Failed to parse grammar file {:?}", grammar_json_path))?;
.with_context(|| format!("Failed to parse grammar file {grammar_json_path:?}"))?;
Ok(grammar.name)
}
@ -33,8 +30,8 @@ pub fn compile_language_to_wasm(
output_dir: &Path,
force_docker: bool,
) -> Result<()> {
let grammar_name = get_grammar_name(&language_dir)?;
let output_filename = output_dir.join(&format!("tree-sitter-{}.wasm", grammar_name));
let grammar_name = get_grammar_name(language_dir)?;
let output_filename = output_dir.join(format!("tree-sitter-{grammar_name}.wasm"));
let src_path = language_dir.join("src");
let scanner_path = loader.get_scanner_path(&src_path);
loader.compile_parser_to_wasm(

View file

@ -5,9 +5,10 @@
[crates.io]: https://crates.io/crates/tree-sitter-highlight
[crates.io badge]: https://img.shields.io/crates/v/tree-sitter-highlight.svg?color=%23B48723
### Usage
## Usage
Add this crate, and the language-specific crates for whichever languages you want to parse, to your `Cargo.toml`:
Add this crate, and the language-specific crates for whichever languages you want
to parse, to your `Cargo.toml`:
```toml
[dependencies]
@ -40,7 +41,8 @@ let highlight_names = [
];
```
Create a highlighter. You need one of these for each thread that you're using for syntax highlighting:
Create a highlighter. You need one of these for each thread that you're using for
syntax highlighting:
```rust
use tree_sitter_highlight::Highlighter;
@ -57,9 +59,11 @@ let javascript_language = tree_sitter_javascript::language();
let mut javascript_config = HighlightConfiguration::new(
javascript_language,
"javascript",
tree_sitter_javascript::HIGHLIGHT_QUERY,
tree_sitter_javascript::INJECTION_QUERY,
tree_sitter_javascript::LOCALS_QUERY,
false,
).unwrap();
```
@ -96,4 +100,6 @@ for event in highlights {
}
```
The last parameter to `highlight` is a *language injection* callback. This allows other languages to be retrieved when Tree-sitter detects an embedded document (for example, a piece of JavaScript code inside of a `script` tag within HTML).
The last parameter to `highlight` is a _language injection_ callback. This allows
other languages to be retrieved when Tree-sitter detects an embedded document
(for example, a piece of JavaScript code inside a `script` tag within HTML).

View file

@ -105,20 +105,20 @@ pub unsafe extern "C" fn ts_highlighter_add_language(
};
let highlight_query =
slice::from_raw_parts(highlight_query as *const u8, highlight_query_len as usize);
slice::from_raw_parts(highlight_query.cast::<u8>(), highlight_query_len as usize);
let highlight_query = str::from_utf8(highlight_query).or(Err(ErrorCode::InvalidUtf8))?;
let injection_query = if injection_query_len > 0 {
let query =
slice::from_raw_parts(injection_query as *const u8, injection_query_len as usize);
slice::from_raw_parts(injection_query.cast::<u8>(), injection_query_len as usize);
str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
} else {
""
};
let locals_query = if locals_query_len > 0 {
let query = slice::from_raw_parts(locals_query as *const u8, locals_query_len as usize);
let query = slice::from_raw_parts(locals_query.cast::<u8>(), locals_query_len as usize);
str::from_utf8(query).or(Err(ErrorCode::InvalidUtf8))?
} else {
""
@ -167,7 +167,7 @@ pub extern "C" fn ts_highlight_buffer_new() -> *mut TSHighlightBuffer {
/// It cannot be used after this function is called.
#[no_mangle]
pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
drop(Box::from_raw(this))
drop(Box::from_raw(this));
}
/// Deletes a [`TSHighlightBuffer`] instance.
@ -180,7 +180,7 @@ pub unsafe extern "C" fn ts_highlighter_delete(this: *mut TSHighlighter) {
/// It cannot be used after this function is called.
#[no_mangle]
pub unsafe extern "C" fn ts_highlight_buffer_delete(this: *mut TSHighlightBuffer) {
drop(Box::from_raw(this))
drop(Box::from_raw(this));
}
/// Get the HTML content of a [`TSHighlightBuffer`] instance as a raw pointer.
@ -263,7 +263,7 @@ pub unsafe extern "C" fn ts_highlighter_highlight(
let this = unwrap_ptr(this);
let output = unwrap_mut_ptr(output);
let scope_name = unwrap(CStr::from_ptr(scope_name).to_str());
let source_code = slice::from_raw_parts(source_code as *const u8, source_code_len as usize);
let source_code = slice::from_raw_parts(source_code.cast::<u8>(), source_code_len as usize);
let cancellation_flag = cancellation_flag.as_ref();
this.highlight(source_code, scope_name, output, cancellation_flag)
}
@ -309,7 +309,7 @@ impl TSHighlighter {
.renderer
.render(highlights, source_code, &|s| self.attribute_strings[s.0]);
match result {
Err(Error::Cancelled) | Err(Error::Unknown) => ErrorCode::Timeout,
Err(Error::Cancelled | Error::Unknown) => ErrorCode::Timeout,
Err(Error::InvalidLanguage) => ErrorCode::InvalidLanguage,
Ok(()) => ErrorCode::Ok,
}
@ -335,7 +335,7 @@ unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
result.unwrap_or_else(|error| {
eprintln!("tree-sitter highlight error: {}", error);
eprintln!("tree-sitter highlight error: {error}");
abort();
})
}

View file

@ -1,7 +1,6 @@
#![doc = include_str!("../README.md")]
pub mod c_lib;
pub mod util;
pub use c_lib as c;
use lazy_static::lazy_static;
@ -180,9 +179,16 @@ struct HighlightIterLayer<'a> {
depth: usize,
}
impl Default for Highlighter {
fn default() -> Self {
Self::new()
}
}
impl Highlighter {
#[must_use]
pub fn new() -> Self {
Highlighter {
Self {
parser: Parser::new(),
cursors: Vec::new(),
}
@ -333,7 +339,7 @@ impl HighlightConfiguration {
}
let highlight_indices = vec![None; query.capture_names().len()];
Ok(HighlightConfiguration {
Ok(Self {
language,
language_name: name.into(),
query,
@ -353,7 +359,8 @@ impl HighlightConfiguration {
}
/// Get a slice containing all of the highlight names used in the configuration.
pub fn names(&self) -> &[&str] {
#[must_use]
pub const fn names(&self) -> &[&str] {
self.query.capture_names()
}
@ -377,7 +384,7 @@ impl HighlightConfiguration {
let mut best_index = None;
let mut best_match_len = 0;
for (i, recognized_name) in recognized_names.into_iter().enumerate() {
for (i, recognized_name) in recognized_names.iter().enumerate() {
let mut len = 0;
let mut matches = true;
for part in recognized_name.as_ref().split('.') {
@ -399,16 +406,17 @@ impl HighlightConfiguration {
// Return the list of this configuration's capture names that are neither present in the
// list of predefined 'canonical' names nor start with an underscore (denoting 'private' captures
// used as part of capture internals).
#[must_use]
pub fn nonconformant_capture_names(&self, capture_names: &HashSet<&str>) -> Vec<&str> {
let capture_names = if capture_names.is_empty() {
&*STANDARD_CAPTURE_NAMES
} else {
&capture_names
capture_names
};
self.names()
.iter()
.filter(|&n| !(n.starts_with('_') || capture_names.contains(n)))
.map(|n| *n)
.copied()
.collect()
}
}
@ -419,6 +427,7 @@ impl<'a> HighlightIterLayer<'a> {
/// In the even that the new layer contains "combined injections" (injections where multiple
/// disjoint ranges are parsed as one syntax tree), these will be eagerly processed and
/// added to the returned vector.
#[allow(clippy::too_many_arguments)]
fn new<F: FnMut(&str) -> Option<&'a HighlightConfiguration> + 'a>(
source: &'a [u8],
parent_name: Option<&str>,
@ -444,7 +453,7 @@ impl<'a> HighlightIterLayer<'a> {
.parse(source, None)
.ok_or(Error::Cancelled)?;
unsafe { highlighter.parser.set_cancellation_flag(None) };
let mut cursor = highlighter.cursors.pop().unwrap_or(QueryCursor::new());
let mut cursor = highlighter.cursors.pop().unwrap_or_default();
// Process combined injections.
if let Some(combined_injections_query) = &config.combined_injections_query {
@ -514,12 +523,12 @@ impl<'a> HighlightIterLayer<'a> {
if queue.is_empty() {
break;
} else {
let (next_config, next_depth, next_ranges) = queue.remove(0);
config = next_config;
depth = next_depth;
ranges = next_ranges;
}
let (next_config, next_depth, next_ranges) = queue.remove(0);
config = next_config;
depth = next_depth;
ranges = next_ranges;
}
Ok(result)
@ -545,7 +554,7 @@ impl<'a> HighlightIterLayer<'a> {
let mut parent_range = parent_range_iter
.next()
.expect("Layers should only be constructed with non-empty ranges vectors");
for node in nodes.iter() {
for node in nodes {
let mut preceding_range = Range {
start_byte: 0,
start_point: Point::new(0, 0),
@ -568,7 +577,7 @@ impl<'a> HighlightIterLayer<'a> {
Some(child.range())
}
})
.chain([following_range].iter().cloned())
.chain(std::iter::once(following_range))
{
let mut range = Range {
start_byte: preceding_range.end_byte,
@ -628,7 +637,7 @@ impl<'a> HighlightIterLayer<'a> {
.captures
.peek()
.map(|(m, i)| m.captures[*i].node.start_byte());
let next_end = self.highlight_end_stack.last().cloned();
let next_end = self.highlight_end_stack.last().copied();
match (next_start, next_end) {
(Some(start), Some(end)) => {
if start < end {
@ -685,10 +694,9 @@ where
self.layers[0..(i + 1)].rotate_left(1);
}
break;
} else {
let layer = self.layers.remove(0);
self.highlighter.cursors.push(layer.cursor);
}
let layer = self.layers.remove(0);
self.highlighter.cursors.push(layer.cursor);
}
}
@ -760,7 +768,7 @@ where
// If any previous highlight ends before this node starts, then before
// processing this capture, emit the source code up until the end of the
// previous highlight, and an end event for that highlight.
if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
if end_byte <= range.start {
layer.highlight_end_stack.pop();
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
@ -769,7 +777,7 @@ where
}
// If there are no more captures, then emit any remaining highlight end events.
// And if there are none of those, then just advance to the end of the document.
else if let Some(end_byte) = layer.highlight_end_stack.last().cloned() {
else if let Some(end_byte) = layer.highlight_end_stack.last().copied() {
layer.highlight_end_stack.pop();
return self.emit_event(end_byte, Some(HighlightEvent::HighlightEnd));
} else {
@ -848,12 +856,9 @@ where
local_defs: Vec::new(),
};
for prop in layer.config.query.property_settings(match_.pattern_index) {
match prop.key.as_ref() {
"local.scope-inherits" => {
scope.inherits =
prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
}
_ => {}
if prop.key.as_ref() == "local.scope-inherits" {
scope.inherits =
prop.value.as_ref().map_or(true, |r| r.as_ref() == "true");
}
}
layer.scope_stack.push(scope);
@ -884,26 +889,24 @@ where
}
// If the node represents a reference, then try to find the corresponding
// definition in the scope stack.
else if Some(capture.index) == layer.config.local_ref_capture_index {
if definition_highlight.is_none() {
definition_highlight = None;
if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
for scope in layer.scope_stack.iter().rev() {
if let Some(highlight) =
scope.local_defs.iter().rev().find_map(|def| {
if def.name == name && range.start >= def.value_range.end {
Some(def.highlight)
} else {
None
}
})
{
reference_highlight = highlight;
break;
}
if !scope.inherits {
break;
else if Some(capture.index) == layer.config.local_ref_capture_index
&& definition_highlight.is_none()
{
definition_highlight = None;
if let Ok(name) = str::from_utf8(&self.source[range.clone()]) {
for scope in layer.scope_stack.iter().rev() {
if let Some(highlight) = scope.local_defs.iter().rev().find_map(|def| {
if def.name == name && range.start >= def.value_range.end {
Some(def.highlight)
} else {
None
}
}) {
reference_highlight = highlight;
break;
}
if !scope.inherits {
break;
}
}
}
@ -993,9 +996,16 @@ where
}
}
impl Default for HtmlRenderer {
fn default() -> Self {
Self::new()
}
}
impl HtmlRenderer {
#[must_use]
pub fn new() -> Self {
let mut result = HtmlRenderer {
let mut result = Self {
html: Vec::with_capacity(BUFFER_HTML_RESERVE_CAPACITY),
line_offsets: Vec::with_capacity(BUFFER_LINES_RESERVE_CAPACITY),
carriage_return_highlight: None,
@ -1095,10 +1105,21 @@ impl HtmlRenderer {
self.html.extend(b"</span>");
}
fn add_text<'a, F>(&mut self, src: &[u8], highlights: &Vec<Highlight>, attribute_callback: &F)
fn add_text<'a, F>(&mut self, src: &[u8], highlights: &[Highlight], attribute_callback: &F)
where
F: Fn(Highlight) -> &'a [u8],
{
pub const fn html_escape(c: u8) -> Option<&'static [u8]> {
match c as char {
'>' => Some(b"&gt;"),
'<' => Some(b"&lt;"),
'&' => Some(b"&amp;"),
'\'' => Some(b"&#39;"),
'"' => Some(b"&quot;"),
_ => None,
}
}
let mut last_char_was_cr = false;
for c in LossyUtf8::new(src).flat_map(|p| p.bytes()) {
// Don't render carriage return characters, but allow lone carriage returns (not
@ -1122,7 +1143,7 @@ impl HtmlRenderer {
highlights
.iter()
.for_each(|scope| self.start_highlight(*scope, attribute_callback));
} else if let Some(escape) = util::html_escape(c) {
} else if let Some(escape) = html_escape(c) {
self.html.extend_from_slice(escape);
} else {
self.html.push(c);
@ -1161,7 +1182,7 @@ fn injection_for_match<'a>(
// that sets the injection.language key.
"injection.language" => {
if language_name.is_none() {
language_name = prop.value.as_ref().map(|s| s.as_ref());
language_name = prop.value.as_ref().map(std::convert::AsRef::as_ref);
}
}

View file

@ -1,10 +0,0 @@
pub fn html_escape(c: u8) -> Option<&'static [u8]> {
match c as char {
'>' => Some(b"&gt;"),
'<' => Some(b"&lt;"),
'&' => Some(b"&amp;"),
'\'' => Some(b"&#39;"),
'"' => Some(b"&quot;"),
_ => None,
}
}

View file

@ -9,10 +9,7 @@ fn main() {
let scan_build_path = scan_build_path.to_str().unwrap();
env::set_var(
"CC",
&format!(
"{} -analyze-headers --use-analyzer={} cc",
scan_build_path, clang_path
),
format!("{scan_build_path} -analyze-headers --use-analyzer={clang_path} cc",),
);
}
}
@ -28,7 +25,7 @@ fn main() {
}
let src_path = Path::new("src");
for entry in fs::read_dir(&src_path).unwrap() {
for entry in fs::read_dir(src_path).unwrap() {
let entry = entry.unwrap();
let path = src_path.join(entry.file_name());
println!("cargo:rerun-if-changed={}", path.to_str().unwrap());
@ -80,9 +77,9 @@ fn generate_bindings() {
let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
let bindings_rs = out_dir.join("bindings.rs");
bindings.write_to_file(&bindings_rs).expect(&*format!(
"Failed to write bindings into path: {bindings_rs:?}"
));
bindings
.write_to_file(&bindings_rs)
.unwrap_or_else(|_| panic!("Failed to write bindings into path: {bindings_rs:?}"));
}
fn which(exe_name: impl AsRef<Path>) -> Option<PathBuf> {

View file

@ -23,11 +23,13 @@ impl Language {
/// # Safety
///
/// `ptr` must be non-null.
pub unsafe fn from_raw(ptr: *const TSLanguage) -> Language {
Language(ptr)
#[must_use]
pub const unsafe fn from_raw(ptr: *const TSLanguage) -> Self {
Self(ptr)
}
/// Consumes the [`Language`], returning a raw pointer to the underlying C structure.
#[must_use]
pub fn into_raw(self) -> *const TSLanguage {
ManuallyDrop::new(self).0
}
@ -39,8 +41,9 @@ impl Parser {
/// # Safety
///
/// `ptr` must be non-null.
pub unsafe fn from_raw(ptr: *mut TSParser) -> Parser {
Parser(NonNull::new_unchecked(ptr))
#[must_use]
pub const unsafe fn from_raw(ptr: *mut TSParser) -> Self {
Self(NonNull::new_unchecked(ptr))
}
/// Consumes the [`Parser`], returning a raw pointer to the underlying C structure.
@ -50,6 +53,7 @@ impl Parser {
/// It's a caller responsibility to adjust parser's state
/// like disable logging or dot graphs printing if this
/// may cause issues like use after free.
#[must_use]
pub fn into_raw(self) -> *mut TSParser {
ManuallyDrop::new(self).0.as_ptr()
}
@ -61,11 +65,13 @@ impl Tree {
/// # Safety
///
/// `ptr` must be non-null.
pub unsafe fn from_raw(ptr: *mut TSTree) -> Tree {
Tree(NonNull::new_unchecked(ptr))
#[must_use]
pub const unsafe fn from_raw(ptr: *mut TSTree) -> Self {
Self(NonNull::new_unchecked(ptr))
}
/// Consumes the [`Tree`], returning a raw pointer to the underlying C structure.
#[must_use]
pub fn into_raw(self) -> *mut TSTree {
ManuallyDrop::new(self).0.as_ptr()
}
@ -77,11 +83,13 @@ impl<'tree> Node<'tree> {
/// # Safety
///
/// `ptr` must be non-null.
pub unsafe fn from_raw(raw: TSNode) -> Node<'tree> {
Node(raw, PhantomData)
#[must_use]
pub const unsafe fn from_raw(raw: TSNode) -> Node<'tree> {
Self(raw, PhantomData)
}
/// Consumes the [`Node`], returning a raw pointer to the underlying C structure.
#[must_use]
pub fn into_raw(self) -> TSNode {
ManuallyDrop::new(self).0
}
@ -93,11 +101,13 @@ impl<'a> TreeCursor<'a> {
/// # Safety
///
/// `ptr` must be non-null.
pub unsafe fn from_raw(raw: TSTreeCursor) -> TreeCursor<'a> {
TreeCursor(raw, PhantomData)
#[must_use]
pub const unsafe fn from_raw(raw: TSTreeCursor) -> TreeCursor<'a> {
Self(raw, PhantomData)
}
/// Consumes the [`TreeCursor`], returning a raw pointer to the underlying C structure.
#[must_use]
pub fn into_raw(self) -> TSTreeCursor {
ManuallyDrop::new(self).0
}
@ -109,11 +119,12 @@ impl Query {
/// # Safety
///
/// `ptr` must be non-null.
pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result<Query, QueryError> {
Query::from_raw_parts(ptr, source)
pub unsafe fn from_raw(ptr: *mut TSQuery, source: &str) -> Result<Self, QueryError> {
Self::from_raw_parts(ptr, source)
}
/// Consumes the [`Query`], returning a raw pointer to the underlying C structure.
#[must_use]
pub fn into_raw(self) -> *mut TSQuery {
ManuallyDrop::new(self).ptr.as_ptr()
}
@ -125,13 +136,15 @@ impl QueryCursor {
/// # Safety
///
/// `ptr` must be non-null.
pub unsafe fn from_raw(ptr: *mut TSQueryCursor) -> QueryCursor {
QueryCursor {
#[must_use]
pub const unsafe fn from_raw(ptr: *mut TSQueryCursor) -> Self {
Self {
ptr: NonNull::new_unchecked(ptr),
}
}
/// Consumes the [`QueryCursor`], returning a raw pointer to the underlying C structure.
#[must_use]
pub fn into_raw(self) -> *mut TSQueryCursor {
ManuallyDrop::new(self).ptr.as_ptr()
}
@ -143,11 +156,13 @@ impl LookaheadIterator {
/// # Safety
///
/// `ptr` must be non-null.
pub unsafe fn from_raw(ptr: *mut TSLookaheadIterator) -> LookaheadIterator {
LookaheadIterator(NonNull::new_unchecked(ptr))
#[must_use]
pub const unsafe fn from_raw(ptr: *mut TSLookaheadIterator) -> Self {
Self(NonNull::new_unchecked(ptr))
}
/// Consumes the [`LookaheadIterator`], returning a raw pointer to the underlying C structure.
#[must_use]
pub fn into_raw(self) -> *mut TSLookaheadIterator {
ManuallyDrop::new(self).0.as_ptr()
}

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ pub struct CBufferIter<T> {
}
impl<T> CBufferIter<T> {
pub unsafe fn new(ptr: *mut T, count: usize) -> Self {
pub const unsafe fn new(ptr: *mut T, count: usize) -> Self {
Self { ptr, count, i: 0 }
}
}
@ -23,7 +23,7 @@ impl<T: Copy> Iterator for CBufferIter<T> {
None
} else {
self.i += 1;
Some(unsafe { *self.ptr.offset(i as isize) })
Some(unsafe { *self.ptr.add(i) })
}
}
@ -38,7 +38,7 @@ impl<T: Copy> ExactSizeIterator for CBufferIter<T> {}
impl<T> Drop for CBufferIter<T> {
fn drop(&mut self) {
if !self.ptr.is_null() {
unsafe { (FREE_FN)(self.ptr as *mut c_void) };
unsafe { (FREE_FN)(self.ptr.cast::<c_void>()) };
}
}
}

View file

@ -66,17 +66,17 @@ pub extern "C" fn ts_tagger_new() -> *mut TSTagger {
}))
}
/// Delete a TSTagger.
/// Delete a [`TSTagger`].
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagger`] instance.
#[no_mangle]
pub unsafe extern "C" fn ts_tagger_delete(this: *mut TSTagger) {
drop(Box::from_raw(this))
drop(Box::from_raw(this));
}
/// Add a language to a TSTagger.
/// Add a language to a [`TSTagger`].
///
/// Returns a [`TSTagsError`] indicating whether the operation was successful or not.
///
@ -105,13 +105,11 @@ pub unsafe extern "C" fn ts_tagger_add_language(
} else {
&[]
};
let tags_query = match str::from_utf8(tags_query) {
Ok(e) => e,
Err(_) => return TSTagsError::InvalidUtf8,
let Ok(tags_query) = str::from_utf8(tags_query) else {
return TSTagsError::InvalidUtf8;
};
let locals_query = match str::from_utf8(locals_query) {
Ok(e) => e,
Err(_) => return TSTagsError::InvalidUtf8,
let Ok(locals_query) = str::from_utf8(locals_query) else {
return TSTagsError::InvalidUtf8;
};
match TagsConfiguration::new(language, tags_query, locals_query) {
@ -169,16 +167,13 @@ pub unsafe extern "C" fn ts_tagger_tag(
Err(e) => {
return match e {
Error::InvalidLanguage => TSTagsError::InvalidLanguage,
Error::Cancelled => TSTagsError::Timeout,
_ => TSTagsError::Timeout,
}
}
};
for tag in tags {
let tag = if let Ok(tag) = tag {
tag
} else {
let Ok(tag) = tag else {
buffer.tags.clear();
buffer.docs.clear();
return TSTagsError::Timeout;
@ -228,7 +223,7 @@ pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer {
}))
}
/// Delete a TSTagsBuffer.
/// Delete a [`TSTagsBuffer`].
///
/// # Safety
///
@ -236,10 +231,10 @@ pub extern "C" fn ts_tags_buffer_new() -> *mut TSTagsBuffer {
/// [`ts_tags_buffer_new`].
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) {
drop(Box::from_raw(this))
drop(Box::from_raw(this));
}
/// Get the tags from a TSTagsBuffer.
/// Get the tags from a [`TSTagsBuffer`].
///
/// # Safety
///
@ -250,22 +245,20 @@ pub unsafe extern "C" fn ts_tags_buffer_delete(this: *mut TSTagsBuffer) {
/// is deleted with [`ts_tags_buffer_delete`], else the data will point to garbage.
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_tags(this: *const TSTagsBuffer) -> *const TSTag {
let buffer = unwrap_ptr(this);
buffer.tags.as_ptr()
unwrap_ptr(this).tags.as_ptr()
}
/// Get the number of tags in a TSTagsBuffer.
/// Get the number of tags in a [`TSTagsBuffer`].
///
/// # Safety
///
/// `this` must be non-null and a valid pointer to a [`TSTagsBuffer`] instance.
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u32 {
let buffer = unwrap_ptr(this);
buffer.tags.len() as u32
unwrap_ptr(this).tags.len() as u32
}
/// Get the documentation strings from a TSTagsBuffer.
/// Get the documentation strings from a [`TSTagsBuffer`].
///
/// # Safety
///
@ -279,11 +272,10 @@ pub unsafe extern "C" fn ts_tags_buffer_tags_len(this: *const TSTagsBuffer) -> u
/// To get the length of the string, use [`ts_tags_buffer_docs_len`].
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *const c_char {
let buffer = unwrap_ptr(this);
buffer.docs.as_ptr() as *const c_char
unwrap_ptr(this).docs.as_ptr().cast::<c_char>()
}
/// Get the length of the documentation strings in a TSTagsBuffer.
/// Get the length of the documentation strings in a [`TSTagsBuffer`].
///
/// # Safety
///
@ -291,11 +283,10 @@ pub unsafe extern "C" fn ts_tags_buffer_docs(this: *const TSTagsBuffer) -> *cons
/// [`ts_tags_buffer_new`].
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u32 {
let buffer = unwrap_ptr(this);
buffer.docs.len() as u32
unwrap_ptr(this).docs.len() as u32
}
/// Get whether or not a TSTagsBuffer contains any parse errors.
/// Get whether or not a [`TSTagsBuffer`] contains any parse errors.
///
/// # Safety
///
@ -303,8 +294,7 @@ pub unsafe extern "C" fn ts_tags_buffer_docs_len(this: *const TSTagsBuffer) -> u
/// [`ts_tags_buffer_new`].
#[no_mangle]
pub unsafe extern "C" fn ts_tags_buffer_found_parse_error(this: *const TSTagsBuffer) -> bool {
let buffer = unwrap_ptr(this);
buffer.errors_present
unwrap_ptr(this).errors_present
}
/// Get the syntax kinds for a given scope name.
@ -335,7 +325,7 @@ pub unsafe extern "C" fn ts_tagger_syntax_kinds_for_scope_name(
*len = 0;
if let Some(config) = tagger.languages.get(scope_name) {
*len = config.c_syntax_type_names.len() as u32;
return config.c_syntax_type_names.as_ptr() as *const *const c_char;
return config.c_syntax_type_names.as_ptr().cast::<*const c_char>();
}
std::ptr::null()
}
@ -356,7 +346,7 @@ unsafe fn unwrap_mut_ptr<'a, T>(result: *mut T) -> &'a mut T {
fn unwrap<T, E: fmt::Display>(result: Result<T, E>) -> T {
result.unwrap_or_else(|error| {
eprintln!("tree-sitter tag error: {}", error);
eprintln!("tree-sitter tag error: {error}");
abort();
})
}

View file

@ -117,7 +117,7 @@ struct LineInfo {
impl TagsConfiguration {
pub fn new(language: Language, tags_query: &str, locals_query: &str) -> Result<Self, Error> {
let query = Query::new(&language, &format!("{}{}", locals_query, tags_query))?;
let query = Query::new(&language, &format!("{locals_query}{tags_query}"))?;
let tags_query_offset = locals_query.len();
let mut tags_pattern_index = 0;
@ -137,13 +137,12 @@ impl TagsConfiguration {
let mut local_definition_capture_index = None;
for (i, name) in query.capture_names().iter().enumerate() {
match *name {
"" => continue,
"name" => name_capture_index = Some(i as u32),
"ignore" => ignore_capture_index = Some(i as u32),
"doc" => doc_capture_index = Some(i as u32),
"local.scope" => local_scope_capture_index = Some(i as u32),
"local.definition" => local_definition_capture_index = Some(i as u32),
"local.reference" => continue,
"local.reference" | "" => continue,
_ => {
let mut is_definition = false;
@ -153,7 +152,7 @@ impl TagsConfiguration {
} else if name.starts_with("reference.") {
name.trim_start_matches("reference.")
} else {
return Err(Error::InvalidCapture(name.to_string()));
return Err(Error::InvalidCapture((*name).to_string()));
};
if let Ok(cstr) = CString::new(kind) {
@ -200,7 +199,7 @@ impl TagsConfiguration {
}
if let Some(doc_capture_index) = doc_capture_index {
for predicate in query.general_predicates(pattern_index) {
if predicate.args.get(0)
if predicate.args.first()
== Some(&QueryPredicateArg::Capture(doc_capture_index))
{
match (predicate.operator.as_ref(), predicate.args.get(1)) {
@ -216,11 +215,11 @@ impl TagsConfiguration {
}
}
}
return Ok(info);
Ok(info)
})
.collect::<Result<Vec<_>, Error>>()?;
Ok(TagsConfiguration {
Ok(Self {
language,
query,
syntax_type_names,
@ -229,26 +228,37 @@ impl TagsConfiguration {
doc_capture_index,
name_capture_index,
ignore_capture_index,
tags_pattern_index,
local_scope_capture_index,
local_definition_capture_index,
tags_pattern_index,
pattern_info,
})
}
#[must_use]
pub fn syntax_type_name(&self, id: u32) -> &str {
unsafe {
let cstr =
CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const c_char)
.to_bytes();
let cstr = CStr::from_ptr(
self.syntax_type_names[id as usize]
.as_ptr()
.cast::<c_char>(),
)
.to_bytes();
str::from_utf8(cstr).expect("syntax type name was not valid utf-8")
}
}
}
impl Default for TagsContext {
fn default() -> Self {
Self::new()
}
}
impl TagsContext {
#[must_use]
pub fn new() -> Self {
TagsContext {
Self {
parser: Parser::new(),
cursor: QueryCursor::new(),
}
@ -327,9 +337,8 @@ where
let tag = self.tag_queue.remove(0).0;
if tag.is_ignored() {
continue;
} else {
return Some(Ok(tag));
}
return Some(Ok(tag));
}
}
@ -452,11 +461,10 @@ where
for doc_node in &doc_nodes[docs_start_index..] {
if let Ok(content) = str::from_utf8(&self.source[doc_node.byte_range()])
{
let content = if let Some(regex) = &pattern_info.doc_strip_regex {
regex.replace_all(content, "").to_string()
} else {
content.to_string()
};
let content = pattern_info.doc_strip_regex.as_ref().map_or_else(
|| content.to_string(),
|regex| regex.replace_all(content, "").to_string(),
);
match &mut docs {
None => docs = Some(content),
Some(d) => {
@ -511,11 +519,11 @@ where
line_range: line_range.clone(),
});
tag = Tag {
range,
name_range,
line_range,
span,
utf16_column_range,
range,
name_range,
docs,
is_definition,
syntax_type_id,
@ -554,8 +562,9 @@ where
}
impl Tag {
fn ignored(name_range: Range<usize>) -> Self {
Tag {
#[must_use]
const fn ignored(name_range: Range<usize>) -> Self {
Self {
name_range,
line_range: 0..0,
span: Point::new(0, 0)..Point::new(0, 0),
@ -567,7 +576,8 @@ impl Tag {
}
}
fn is_ignored(&self) -> bool {
#[must_use]
const fn is_ignored(&self) -> bool {
self.range.start == usize::MAX
}
}