Merge pull request #2687 from ahlinc/offset-pos-opt

chore(rust): improve perf for position funcs
This commit is contained in:
Andrew Hlynskyi 2023-10-05 20:46:04 +03:00 committed by GitHub
commit 20924fa4cd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 62 additions and 40 deletions

5
Cargo.lock generated
View file

@ -469,9 +469,9 @@ dependencies = [
[[package]]
name = "memchr"
version = "2.5.0"
version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "minimal-lexical"
@ -941,6 +941,7 @@ dependencies = [
"indoc",
"lazy_static",
"log",
"memchr",
"path-slash",
"pretty_assertions",
"rand",

View file

@ -32,6 +32,7 @@ glob = "0.3.1"
html-escape = "0.2.13"
indexmap = "2.0.0"
lazy_static = "1.4.0"
memchr = "2.6.3"
path-slash = "0.2.1"
regex = "1.9.1"
regex-syntax = "0.7.4"

View file

@ -115,7 +115,7 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
for (i, edit) in opts.edits.iter().enumerate() {
let edit = parse_edit_flag(&source_code, edit)?;
perform_edit(&mut tree, &mut source_code, &edit);
perform_edit(&mut tree, &mut source_code, &edit)?;
tree = parser.parse(&source_code, Some(&tree)).unwrap();
if opts.debug_graph {
@ -309,14 +309,14 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
Ok(false)
}
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
let start_byte = edit.position;
let old_end_byte = edit.position + edit.deleted_length;
let new_end_byte = edit.position + edit.inserted_text.len();
let start_position = position_for_offset(input, start_byte);
let old_end_position = position_for_offset(input, old_end_byte);
let start_position = position_for_offset(input, start_byte)?;
let old_end_position = position_for_offset(input, old_end_byte)?;
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
let new_end_position = position_for_offset(input, new_end_byte);
let new_end_position = position_for_offset(input, new_end_byte)?;
let edit = InputEdit {
start_byte,
old_end_byte,
@ -326,7 +326,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputE
new_end_position,
};
tree.edit(&edit);
edit
Ok(edit)
}
fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
@ -355,7 +355,7 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
let row = usize::from_str_radix(row, 10).map_err(|_| error())?;
let column = parts.next().ok_or_else(error)?;
let column = usize::from_str_radix(column, 10).map_err(|_| error())?;
offset_for_position(source_code, Point { row, column })
offset_for_position(source_code, Point { row, column })?
} else {
usize::from_str_radix(position, 10).map_err(|_| error())?
};
@ -370,31 +370,48 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
})
}
fn offset_for_position(input: &Vec<u8>, position: Point) -> usize {
let mut current_position = Point { row: 0, column: 0 };
for (i, c) in input.iter().enumerate() {
if *c as char == '\n' {
current_position.row += 1;
current_position.column = 0;
} else {
current_position.column += 1;
}
if current_position > position {
return i;
pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
let mut row = 0;
let mut offset = 0;
let mut iter = memchr::memchr_iter(b'\n', input);
loop {
if let Some(pos) = iter.next() {
if row < position.row {
row += 1;
offset = pos;
continue;
}
}
offset += 1;
break;
}
return input.len();
if position.row - row > 0 {
return Err(anyhow!("Failed to address a row: {}", position.row));
}
if let Some(pos) = iter.next() {
if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
return Err(anyhow!("Failed to address a column: {}", position.column));
};
} else if input.len() - offset < position.column {
return Err(anyhow!("Failed to address a column over the end"));
}
Ok(offset + position.column)
}
fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
let mut result = Point { row: 0, column: 0 };
for c in &input[0..offset] {
if *c as char == '\n' {
result.row += 1;
result.column = 0;
} else {
result.column += 1;
}
pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
if offset > input.len() {
return Err(anyhow!("Failed to address an offset: {offset}"));
}
result
let mut result = Point { row: 0, column: 0 };
let mut last = 0;
for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
result.row += 1;
last = pos;
}
result.column = if result.row > 0 {
offset - last - 1
} else {
offset
};
Ok(result)
}

View file

@ -187,7 +187,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<
for _ in 0..1 + rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit);
perform_edit(&mut tree, &mut input, &edit).unwrap();
}
if log_seed {
@ -219,7 +219,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit);
perform_edit(&mut tree2, &mut input, &edit).unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));

View file

@ -552,7 +552,7 @@ fn test_node_edit() {
let edit = get_random_edit(&mut rand, &mut code);
let mut tree2 = tree.clone();
let edit = perform_edit(&mut tree2, &mut code, &edit);
let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap();
for node in nodes_before.iter_mut() {
node.edit(&edit);
}

View file

@ -342,7 +342,8 @@ fn test_parsing_after_editing_beginning_of_code() {
deleted_length: 0,
inserted_text: b" || 5".to_vec(),
},
);
)
.unwrap();
let mut recorder = ReadRecorder::new(&code);
let tree = parser
@ -389,7 +390,8 @@ fn test_parsing_after_editing_end_of_code() {
deleted_length: 0,
inserted_text: b".d".to_vec(),
},
);
)
.unwrap();
let mut recorder = ReadRecorder::new(&code);
let tree = parser
@ -464,7 +466,8 @@ h + i
deleted_length: 0,
inserted_text: b"1234".to_vec(),
},
);
)
.unwrap();
assert_eq!(
code,
@ -528,12 +531,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
let undo = invert_edit(&source, &edit);
let mut tree2 = tree.clone();
perform_edit(&mut tree2, &mut source, &edit);
perform_edit(&mut tree2, &mut source, &edit).unwrap();
tree2 = parser.parse(&source, Some(&tree2)).unwrap();
assert!(tree2.root_node().has_error());
let mut tree3 = tree2.clone();
perform_edit(&mut tree3, &mut source, &undo);
perform_edit(&mut tree3, &mut source, &undo).unwrap();
tree3 = parser.parse(&source, Some(&tree3)).unwrap();
assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),);
}

View file

@ -663,7 +663,7 @@ fn get_changed_ranges(
source_code: &mut Vec<u8>,
edit: Edit,
) -> Vec<Range> {
perform_edit(tree, source_code, &edit);
perform_edit(tree, source_code, &edit).unwrap();
let new_tree = parser.parse(&source_code, Some(tree)).unwrap();
let result = tree.changed_ranges(&new_tree).collect();
*tree = new_tree;