fix: properly handle utf8 code points for highlight and tag assertions
(cherry picked from commit 6f050f0da5)
This commit is contained in:
parent
3ad82e6772
commit
8e1dbb4617
9 changed files with 131 additions and 53 deletions
57
Cargo.lock
generated
57
Cargo.lock
generated
|
|
@ -99,7 +99,7 @@ dependencies = [
|
|||
"bitflags",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"itertools",
|
||||
"itertools 0.13.0",
|
||||
"log",
|
||||
"prettyplease",
|
||||
"proc-macro2",
|
||||
|
|
@ -125,6 +125,17 @@ dependencies = [
|
|||
"objc2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.16.0"
|
||||
|
|
@ -145,9 +156,9 @@ checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50"
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.1.19"
|
||||
version = "1.1.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2d74707dde2ba56f86ae90effb3b43ddd369504387e718014de010cec7959800"
|
||||
checksum = "45bcde016d64c21da4be18b655631e5ab6d3107607e71a73a9f53eb48aae23fb"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
|
|
@ -391,7 +402,7 @@ dependencies = [
|
|||
"cranelift-codegen",
|
||||
"cranelift-entity",
|
||||
"cranelift-frontend",
|
||||
"itertools",
|
||||
"itertools 0.12.1",
|
||||
"log",
|
||||
"smallvec",
|
||||
"wasmparser",
|
||||
|
|
@ -677,6 +688,15 @@ dependencies = [
|
|||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.11"
|
||||
|
|
@ -914,9 +934,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.36.3"
|
||||
version = "0.36.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9"
|
||||
checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"hashbrown 0.14.5",
|
||||
|
|
@ -1036,9 +1056,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "psm"
|
||||
version = "0.1.21"
|
||||
version = "0.1.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874"
|
||||
checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
|
@ -1084,9 +1104,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.3"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4"
|
||||
checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
]
|
||||
|
|
@ -1152,9 +1172,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
|||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.34"
|
||||
version = "0.38.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
|
||||
checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"errno",
|
||||
|
|
@ -1279,9 +1299,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.76"
|
||||
version = "2.0.77"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
|
||||
checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -1386,9 +1406,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "toml_edit"
|
||||
version = "0.22.20"
|
||||
version = "0.22.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d"
|
||||
checksum = "3b072cee73c449a636ffd6f32bd8de3a9f7119139aff882f44943ce2986dc5cf"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"serde",
|
||||
|
|
@ -1446,6 +1466,7 @@ version = "0.23.0"
|
|||
dependencies = [
|
||||
"anstyle",
|
||||
"anyhow",
|
||||
"bstr",
|
||||
"clap",
|
||||
"ctor",
|
||||
"ctrlc",
|
||||
|
|
@ -1558,9 +1579,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
|
|||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
version = "1.0.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ strip = false
|
|||
[workspace.dependencies]
|
||||
anstyle = "1.0.8"
|
||||
anyhow = "1.0.89"
|
||||
bstr = "1.10.0"
|
||||
cc = "1.1.19"
|
||||
clap = { version = "4.5.17", features = [
|
||||
"cargo",
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"]
|
|||
[dependencies]
|
||||
anstyle.workspace = true
|
||||
anyhow.workspace = true
|
||||
bstr.workspace = true
|
||||
clap.workspace = true
|
||||
ctor.workspace = true
|
||||
ctrlc.workspace = true
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use std::{
|
|||
use anyhow::{Context, Result};
|
||||
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
|
||||
|
||||
use crate::query_testing;
|
||||
use crate::query_testing::{self, to_utf8_point};
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn query_files_at_paths(
|
||||
|
|
@ -70,8 +70,8 @@ pub fn query_files_at_paths(
|
|||
}
|
||||
results.push(query_testing::CaptureInfo {
|
||||
name: (*capture_name).to_string(),
|
||||
start: capture.node.start_position(),
|
||||
end: capture.node.end_position(),
|
||||
start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
|
||||
end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
|
||||
});
|
||||
}
|
||||
} else {
|
||||
|
|
@ -100,8 +100,8 @@ pub fn query_files_at_paths(
|
|||
}
|
||||
results.push(query_testing::CaptureInfo {
|
||||
name: (*capture_name).to_string(),
|
||||
start: capture.node.start_position(),
|
||||
end: capture.node.end_position(),
|
||||
start: to_utf8_point(capture.node.start_position(), source_code.as_slice()),
|
||||
end: to_utf8_point(capture.node.end_position(), source_code.as_slice()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use std::fs;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use bstr::{BStr, ByteSlice};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use tree_sitter::{Language, Parser, Point};
|
||||
|
|
@ -9,16 +10,56 @@ lazy_static! {
|
|||
static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct Utf8Point {
|
||||
pub row: usize,
|
||||
pub column: usize,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Utf8Point {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "({}, {})", self.row, self.column)
|
||||
}
|
||||
}
|
||||
|
||||
impl Utf8Point {
|
||||
pub const fn new(row: usize, column: usize) -> Self {
|
||||
Self { row, column }
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point {
|
||||
if point.column == 0 {
|
||||
return Utf8Point::new(point.row, 0);
|
||||
}
|
||||
|
||||
let bstr = BStr::new(source);
|
||||
let line = bstr.lines_with_terminator().nth(point.row).unwrap();
|
||||
let mut utf8_column = 0;
|
||||
|
||||
for (_, grapheme_end, _) in line.grapheme_indices() {
|
||||
utf8_column += 1;
|
||||
if grapheme_end >= point.column {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Utf8Point {
|
||||
row: point.row,
|
||||
column: utf8_column,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct CaptureInfo {
|
||||
pub name: String,
|
||||
pub start: Point,
|
||||
pub end: Point,
|
||||
pub start: Utf8Point,
|
||||
pub end: Utf8Point,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Assertion {
|
||||
pub position: Point,
|
||||
pub position: Utf8Point,
|
||||
pub negative: bool,
|
||||
pub expected_capture_name: String,
|
||||
}
|
||||
|
|
@ -27,7 +68,7 @@ impl Assertion {
|
|||
#[must_use]
|
||||
pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self {
|
||||
Self {
|
||||
position: Point::new(row, col),
|
||||
position: Utf8Point::new(row, col),
|
||||
negative,
|
||||
expected_capture_name,
|
||||
}
|
||||
|
|
@ -103,7 +144,7 @@ pub fn parse_position_comments(
|
|||
{
|
||||
assertion_ranges.push((node.start_position(), node.end_position()));
|
||||
result.push(Assertion {
|
||||
position,
|
||||
position: to_utf8_point(position, source),
|
||||
negative,
|
||||
expected_capture_name: mat.as_str().to_string(),
|
||||
});
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, H
|
|||
use tree_sitter_loader::{Config, Loader};
|
||||
|
||||
use super::{
|
||||
query_testing::{parse_position_comments, Assertion},
|
||||
query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
|
||||
test::paint,
|
||||
util,
|
||||
};
|
||||
|
|
@ -141,7 +141,7 @@ fn test_highlights_indented(
|
|||
}
|
||||
pub fn iterate_assertions(
|
||||
assertions: &[Assertion],
|
||||
highlights: &[(Point, Point, Highlight)],
|
||||
highlights: &[(Utf8Point, Utf8Point, Highlight)],
|
||||
highlight_names: &[String],
|
||||
) -> Result<usize> {
|
||||
// Iterate through all of the highlighting assertions, checking each one against the
|
||||
|
|
@ -224,7 +224,7 @@ pub fn get_highlight_positions(
|
|||
highlighter: &mut Highlighter,
|
||||
highlight_config: &HighlightConfiguration,
|
||||
source: &[u8],
|
||||
) -> Result<Vec<(Point, Point, Highlight)>> {
|
||||
) -> Result<Vec<(Utf8Point, Utf8Point, Highlight)>> {
|
||||
let mut row = 0;
|
||||
let mut column = 0;
|
||||
let mut byte_offset = 0;
|
||||
|
|
@ -261,7 +261,10 @@ pub fn get_highlight_positions(
|
|||
}
|
||||
}
|
||||
if let Some(highlight) = highlight_stack.last() {
|
||||
result.push((start_position, Point::new(row, column), *highlight));
|
||||
let utf8_start_position = to_utf8_point(start_position, source.as_bytes());
|
||||
let utf8_end_position =
|
||||
to_utf8_point(Point::new(row, column), source.as_bytes());
|
||||
result.push((utf8_start_position, utf8_end_position, *highlight));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,12 +2,11 @@ use std::{fs, path::Path};
|
|||
|
||||
use anstyle::AnsiColor;
|
||||
use anyhow::{anyhow, Result};
|
||||
use tree_sitter::Point;
|
||||
use tree_sitter_loader::{Config, Loader};
|
||||
use tree_sitter_tags::{TagsConfiguration, TagsContext};
|
||||
|
||||
use super::{
|
||||
query_testing::{parse_position_comments, Assertion},
|
||||
query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point},
|
||||
test::paint,
|
||||
util,
|
||||
};
|
||||
|
|
@ -168,7 +167,7 @@ pub fn get_tag_positions(
|
|||
tags_context: &mut TagsContext,
|
||||
tags_config: &TagsConfiguration,
|
||||
source: &[u8],
|
||||
) -> Result<Vec<(Point, Point, String)>> {
|
||||
) -> Result<Vec<(Utf8Point, Utf8Point, String)>> {
|
||||
let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?;
|
||||
let tag_positions = tags_iter
|
||||
.filter_map(std::result::Result::ok)
|
||||
|
|
@ -179,7 +178,11 @@ pub fn get_tag_positions(
|
|||
} else {
|
||||
format!("reference.{tag_postfix}")
|
||||
};
|
||||
(tag.span.start, tag.span.end, tag_name)
|
||||
(
|
||||
to_utf8_point(tag.span.start, source),
|
||||
to_utf8_point(tag.span.end, source),
|
||||
tag_name,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
Ok(tag_positions)
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use tree_sitter::{Parser, Point};
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter_highlight::{Highlight, Highlighter};
|
||||
|
||||
use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
|
||||
use crate::{
|
||||
query_testing::{parse_position_comments, Assertion},
|
||||
query_testing::{parse_position_comments, Assertion, Utf8Point},
|
||||
test_highlight::get_highlight_positions,
|
||||
};
|
||||
|
||||
|
|
@ -28,6 +28,9 @@ fn test_highlight_test_with_basic_test() {
|
|||
" // ^ variable",
|
||||
" // ^ !variable",
|
||||
"};",
|
||||
"var y̆y̆y̆y̆ = function() {}",
|
||||
" // ^ function",
|
||||
" // ^ keyword",
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
|
|
@ -40,6 +43,8 @@ fn test_highlight_test_with_basic_test() {
|
|||
Assertion::new(1, 11, false, String::from("keyword")),
|
||||
Assertion::new(4, 9, false, String::from("variable")),
|
||||
Assertion::new(4, 11, true, String::from("variable")),
|
||||
Assertion::new(8, 5, false, String::from("function")),
|
||||
Assertion::new(8, 11, false, String::from("keyword")),
|
||||
]
|
||||
);
|
||||
|
||||
|
|
@ -50,13 +55,16 @@ fn test_highlight_test_with_basic_test() {
|
|||
assert_eq!(
|
||||
highlight_positions,
|
||||
&[
|
||||
(Point::new(1, 0), Point::new(1, 3), Highlight(2)), // "var"
|
||||
(Point::new(1, 4), Point::new(1, 7), Highlight(0)), // "abc"
|
||||
(Point::new(1, 10), Point::new(1, 18), Highlight(2)), // "function"
|
||||
(Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
|
||||
(Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
|
||||
(Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
|
||||
(Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e"
|
||||
(Utf8Point::new(1, 0), Utf8Point::new(1, 3), Highlight(2)), // "var"
|
||||
(Utf8Point::new(1, 4), Utf8Point::new(1, 7), Highlight(0)), // "abc"
|
||||
(Utf8Point::new(1, 10), Utf8Point::new(1, 18), Highlight(2)), // "function"
|
||||
(Utf8Point::new(1, 19), Utf8Point::new(1, 20), Highlight(1)), // "d"
|
||||
(Utf8Point::new(4, 2), Utf8Point::new(4, 8), Highlight(2)), // "return"
|
||||
(Utf8Point::new(4, 9), Utf8Point::new(4, 10), Highlight(1)), // "d"
|
||||
(Utf8Point::new(4, 13), Utf8Point::new(4, 14), Highlight(1)), // "e"
|
||||
(Utf8Point::new(8, 0), Utf8Point::new(8, 3), Highlight(2)), // "var"
|
||||
(Utf8Point::new(8, 4), Utf8Point::new(8, 8), Highlight(0)), // "y̆y̆y̆y̆"
|
||||
(Utf8Point::new(8, 11), Utf8Point::new(8, 19), Highlight(2)), // "function"
|
||||
]
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use tree_sitter::{Parser, Point};
|
||||
use tree_sitter::Parser;
|
||||
use tree_sitter_tags::TagsContext;
|
||||
|
||||
use super::helpers::fixtures::{get_language, get_tags_config};
|
||||
use crate::{
|
||||
query_testing::{parse_position_comments, Assertion},
|
||||
query_testing::{parse_position_comments, Assertion, Utf8Point},
|
||||
test_tags::get_tag_positions,
|
||||
};
|
||||
|
||||
|
|
@ -43,18 +43,18 @@ fn test_tags_test_with_basic_test() {
|
|||
tag_positions,
|
||||
&[
|
||||
(
|
||||
Point::new(1, 4),
|
||||
Point::new(1, 7),
|
||||
Utf8Point::new(1, 4),
|
||||
Utf8Point::new(1, 7),
|
||||
"definition.function".to_string()
|
||||
),
|
||||
(
|
||||
Point::new(3, 8),
|
||||
Point::new(3, 11),
|
||||
Utf8Point::new(3, 8),
|
||||
Utf8Point::new(3, 11),
|
||||
"reference.call".to_string()
|
||||
),
|
||||
(
|
||||
Point::new(5, 11),
|
||||
Point::new(5, 12),
|
||||
Utf8Point::new(5, 11),
|
||||
Utf8Point::new(5, 12),
|
||||
"reference.call".to_string()
|
||||
),
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue