fix: properly handle utf8 code points for highlight and tag assertions

This commit is contained in:
Amaan Qureshi 2024-09-08 14:26:34 -04:00
parent 755e49e212
commit 6f050f0da5
9 changed files with 131 additions and 53 deletions

57
Cargo.lock generated
View file

@ -99,7 +99,7 @@ dependencies = [
"bitflags",
"cexpr",
"clang-sys",
"itertools",
"itertools 0.13.0",
"log",
"prettyplease",
"proc-macro2",
@ -125,6 +125,17 @@ dependencies = [
"objc2",
]
[[package]]
name = "bstr"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
dependencies = [
"memchr",
"regex-automata",
"serde",
]
[[package]]
name = "bumpalo"
version = "3.16.0"
@ -145,9 +156,9 @@ checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
[[package]]
name = "cc"
version = "1.1.19"
version = "1.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2d74707dde2ba56f86ae90effb3b43ddd369504387e718014de010cec7959800"
checksum = "45bcde016d64c21da4be18b655631e5ab6d3107607e71a73a9f53eb48aae23fb"
dependencies = [
"jobserver",
"libc",
@ -391,7 +402,7 @@ dependencies = [
"cranelift-codegen",
"cranelift-entity",
"cranelift-frontend",
"itertools",
"itertools 0.12.1",
"log",
"smallvec",
"wasmparser",
@ -677,6 +688,15 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.11"
@ -914,9 +934,9 @@ dependencies = [
[[package]]
name = "object"
version = "0.36.3"
version = "0.36.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9"
checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a"
dependencies = [
"crc32fast",
"hashbrown 0.14.5",
@ -1036,9 +1056,9 @@ dependencies = [
[[package]]
name = "psm"
version = "0.1.21"
version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205"
dependencies = [
"cc",
]
@ -1084,9 +1104,9 @@ dependencies = [
[[package]]
name = "redox_syscall"
version = "0.5.3"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4"
checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853"
dependencies = [
"bitflags",
]
@ -1152,9 +1172,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustix"
version = "0.38.34"
version = "0.38.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
dependencies = [
"bitflags",
"errno",
@ -1279,9 +1299,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.76"
version = "2.0.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
dependencies = [
"proc-macro2",
"quote",
@ -1386,9 +1406,9 @@ dependencies = [
[[package]]
name = "toml_edit"
version = "0.22.20"
version = "0.22.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d"
checksum = "3b072cee73c449a636ffd6f32bd8de3a9f7119139aff882f44943ce2986dc5cf"
dependencies = [
"indexmap",
"serde",
@ -1446,6 +1466,7 @@ version = "0.23.0"
dependencies = [
"anstyle",
"anyhow",
"bstr",
"clap",
"ctor",
"ctrlc",
@ -1558,9 +1579,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
[[package]]
name = "unicode-ident"
version = "1.0.12"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
name = "unicode-normalization"

View file

@ -41,6 +41,7 @@ strip = false
[workspace.dependencies]
anstyle = "1.0.8"
anyhow = "1.0.89"
bstr = "1.10.0"
cc = "1.1.19"
clap = { version = "4.5.17", features = [
"cargo",

View file

@ -27,6 +27,7 @@ wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"]
[dependencies]
anstyle.workspace = true
anyhow.workspace = true
bstr.workspace = true
clap.workspace = true
ctor.workspace = true
ctrlc.workspace = true

View file

@ -9,7 +9,7 @@ use std::{
use anyhow::{Context, Result};
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
use crate::query_testing;
use crate::query_testing::{self, to_utf8_point};
#[allow(clippy::too_many_arguments)]
pub fn query_files_at_paths(
@ -70,8 +70,8 @@ pub fn query_files_at_paths(
}
results.push(query_testing::CaptureInfo {
name: (*capture_name).to_string(),
start: capture.node.start_position(),
end: capture.node.end_position(),
start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
});
}
} else {
@ -100,8 +100,8 @@ pub fn query_files_at_paths(
}
results.push(query_testing::CaptureInfo {
name: (*capture_name).to_string(),
start: capture.node.start_position(),
end: capture.node.end_position(),
start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
});
}
}

View file

@ -1,6 +1,7 @@
use std::fs;
use anyhow::{anyhow, Result};
use bstr::{BStr, ByteSlice};
use lazy_static::lazy_static;
use regex::Regex;
use tree_sitter::{Language, Parser, Point};
@ -9,16 +10,56 @@ lazy_static! {
static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap();
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Utf8Point {
pub row: usize,
pub column: usize,
}
impl std::fmt::Display for Utf8Point {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "({}, {})", self.row, self.column)
}
}
impl Utf8Point {
pub const fn new(row: usize, column: usize) -> Self {
Self { row, column }
}
}
pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point {
if point.column == 0 {
return Utf8Point::new(point.row, 0);
}
let bstr = BStr::new(source);
let line = bstr.lines_with_terminator().nth(point.row).unwrap();
let mut utf8_column = 0;
for (_, grapheme_end, _) in line.grapheme_indices() {
utf8_column += 1;
if grapheme_end >= point.column {
break;
}
}
Utf8Point {
row: point.row,
column: utf8_column,
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct CaptureInfo {
pub name: String,
pub start: Point,
pub end: Point,
pub start: Utf8Point,
pub end: Utf8Point,
}
#[derive(Debug, PartialEq, Eq)]
pub struct Assertion {
pub position: Point,
pub position: Utf8Point,
pub negative: bool,
pub expected_capture_name: String,
}
@ -32,7 +73,7 @@ impl Assertion {
expected_capture_name: String,
) -> Self {
Self {
position: Point::new(row, col),
position: Utf8Point::new(row, col),
negative,
expected_capture_name,
}
@ -108,7 +149,7 @@ pub fn parse_position_comments(
{
assertion_ranges.push((node.start_position(), node.end_position()));
result.push(Assertion {
position,
position: to_utf8_point(position, source),
negative,
expected_capture_name: mat.as_str().to_string(),
});

View file

@ -7,7 +7,7 @@ use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, H
use tree_sitter_loader::{Config, Loader};
use super::{
query_testing::{parse_position_comments, Assertion},
query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
test::paint,
util,
};
@ -141,7 +141,7 @@ fn test_highlights_indented(
}
pub fn iterate_assertions(
assertions: &[Assertion],
highlights: &[(Point, Point, Highlight)],
highlights: &[(Utf8Point, Utf8Point, Highlight)],
highlight_names: &[String],
) -> Result<usize> {
// Iterate through all of the highlighting assertions, checking each one against the
@ -224,7 +224,7 @@ pub fn get_highlight_positions(
highlighter: &mut Highlighter,
highlight_config: &HighlightConfiguration,
source: &[u8],
) -> Result<Vec<(Point, Point, Highlight)>> {
) -> Result<Vec<(Utf8Point, Utf8Point, Highlight)>> {
let mut row = 0;
let mut column = 0;
let mut byte_offset = 0;
@ -261,7 +261,10 @@ pub fn get_highlight_positions(
}
}
if let Some(highlight) = highlight_stack.last() {
result.push((start_position, Point::new(row, column), *highlight));
let utf8_start_position = to_utf8_point(start_position, source.as_bytes());
let utf8_end_position =
to_utf8_point(Point::new(row, column), source.as_bytes());
result.push((utf8_start_position, utf8_end_position, *highlight));
}
}
}

View file

@ -2,12 +2,11 @@ use std::{fs, path::Path};
use anstyle::AnsiColor;
use anyhow::{anyhow, Result};
use tree_sitter::Point;
use tree_sitter_loader::{Config, Loader};
use tree_sitter_tags::{TagsConfiguration, TagsContext};
use super::{
query_testing::{parse_position_comments, Assertion},
query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
test::paint,
util,
};
@ -168,7 +167,7 @@ pub fn get_tag_positions(
tags_context: &mut TagsContext,
tags_config: &TagsConfiguration,
source: &[u8],
) -> Result<Vec<(Point, Point, String)>> {
) -> Result<Vec<(Utf8Point, Utf8Point, String)>> {
let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?;
let tag_positions = tags_iter
.filter_map(std::result::Result::ok)
@ -179,7 +178,11 @@ pub fn get_tag_positions(
} else {
format!("reference.{tag_postfix}")
};
(tag.span.start, tag.span.end, tag_name)
(
to_utf8_point(tag.span.start, source),
to_utf8_point(tag.span.end, source),
tag_name,
)
})
.collect();
Ok(tag_positions)

View file

@ -1,9 +1,9 @@
use tree_sitter::{Parser, Point};
use tree_sitter::Parser;
use tree_sitter_highlight::{Highlight, Highlighter};
use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
use crate::{
query_testing::{parse_position_comments, Assertion},
query_testing::{parse_position_comments, Assertion, Utf8Point},
test_highlight::get_highlight_positions,
};
@ -28,6 +28,9 @@ fn test_highlight_test_with_basic_test() {
" // ^ variable",
" // ^ !variable",
"};",
"var y̆y̆y̆y̆ = function() {}",
" // ^ function",
" // ^ keyword",
]
.join("\n");
@ -40,6 +43,8 @@ fn test_highlight_test_with_basic_test() {
Assertion::new(1, 11, false, String::from("keyword")),
Assertion::new(4, 9, false, String::from("variable")),
Assertion::new(4, 11, true, String::from("variable")),
Assertion::new(8, 5, false, String::from("function")),
Assertion::new(8, 11, false, String::from("keyword")),
]
);
@ -50,13 +55,16 @@ fn test_highlight_test_with_basic_test() {
assert_eq!(
highlight_positions,
&[
(Point::new(1, 0), Point::new(1, 3), Highlight(2)), // "var"
(Point::new(1, 4), Point::new(1, 7), Highlight(0)), // "abc"
(Point::new(1, 10), Point::new(1, 18), Highlight(2)), // "function"
(Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
(Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
(Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
(Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e"
(Utf8Point::new(1, 0), Utf8Point::new(1, 3), Highlight(2)), // "var"
(Utf8Point::new(1, 4), Utf8Point::new(1, 7), Highlight(0)), // "abc"
(Utf8Point::new(1, 10), Utf8Point::new(1, 18), Highlight(2)), // "function"
(Utf8Point::new(1, 19), Utf8Point::new(1, 20), Highlight(1)), // "d"
(Utf8Point::new(4, 2), Utf8Point::new(4, 8), Highlight(2)), // "return"
(Utf8Point::new(4, 9), Utf8Point::new(4, 10), Highlight(1)), // "d"
(Utf8Point::new(4, 13), Utf8Point::new(4, 14), Highlight(1)), // "e"
(Utf8Point::new(8, 0), Utf8Point::new(8, 3), Highlight(2)), // "var"
(Utf8Point::new(8, 4), Utf8Point::new(8, 8), Highlight(0)), // "y̆y̆y̆y̆"
(Utf8Point::new(8, 11), Utf8Point::new(8, 19), Highlight(2)), // "function"
]
);
}

View file

@ -1,9 +1,9 @@
use tree_sitter::{Parser, Point};
use tree_sitter::Parser;
use tree_sitter_tags::TagsContext;
use super::helpers::fixtures::{get_language, get_tags_config};
use crate::{
query_testing::{parse_position_comments, Assertion},
query_testing::{parse_position_comments, Assertion, Utf8Point},
test_tags::get_tag_positions,
};
@ -43,18 +43,18 @@ fn test_tags_test_with_basic_test() {
tag_positions,
&[
(
Point::new(1, 4),
Point::new(1, 7),
Utf8Point::new(1, 4),
Utf8Point::new(1, 7),
"definition.function".to_string()
),
(
Point::new(3, 8),
Point::new(3, 11),
Utf8Point::new(3, 8),
Utf8Point::new(3, 11),
"reference.call".to_string()
),
(
Point::new(5, 11),
Point::new(5, 12),
Utf8Point::new(5, 11),
Utf8Point::new(5, 12),
"reference.call".to_string()
),
]