Merge pull request #2687 from ahlinc/offset-pos-opt
chore(rust): improve perf for position funcs
This commit is contained in:
commit
20924fa4cd
7 changed files with 62 additions and 40 deletions
5
Cargo.lock
generated
5
Cargo.lock
generated
|
|
@ -469,9 +469,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
version = "2.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
|
||||
|
||||
[[package]]
|
||||
name = "minimal-lexical"
|
||||
|
|
@ -941,6 +941,7 @@ dependencies = [
|
|||
"indoc",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"memchr",
|
||||
"path-slash",
|
||||
"pretty_assertions",
|
||||
"rand",
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ glob = "0.3.1"
|
|||
html-escape = "0.2.13"
|
||||
indexmap = "2.0.0"
|
||||
lazy_static = "1.4.0"
|
||||
memchr = "2.6.3"
|
||||
path-slash = "0.2.1"
|
||||
regex = "1.9.1"
|
||||
regex-syntax = "0.7.4"
|
||||
|
|
|
|||
|
|
@ -115,7 +115,7 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
|
|||
|
||||
for (i, edit) in opts.edits.iter().enumerate() {
|
||||
let edit = parse_edit_flag(&source_code, edit)?;
|
||||
perform_edit(&mut tree, &mut source_code, &edit);
|
||||
perform_edit(&mut tree, &mut source_code, &edit)?;
|
||||
tree = parser.parse(&source_code, Some(&tree)).unwrap();
|
||||
|
||||
if opts.debug_graph {
|
||||
|
|
@ -309,14 +309,14 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result<bool> {
|
|||
Ok(false)
|
||||
}
|
||||
|
||||
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
|
||||
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
|
||||
let start_byte = edit.position;
|
||||
let old_end_byte = edit.position + edit.deleted_length;
|
||||
let new_end_byte = edit.position + edit.inserted_text.len();
|
||||
let start_position = position_for_offset(input, start_byte);
|
||||
let old_end_position = position_for_offset(input, old_end_byte);
|
||||
let start_position = position_for_offset(input, start_byte)?;
|
||||
let old_end_position = position_for_offset(input, old_end_byte)?;
|
||||
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
|
||||
let new_end_position = position_for_offset(input, new_end_byte);
|
||||
let new_end_position = position_for_offset(input, new_end_byte)?;
|
||||
let edit = InputEdit {
|
||||
start_byte,
|
||||
old_end_byte,
|
||||
|
|
@ -326,7 +326,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputE
|
|||
new_end_position,
|
||||
};
|
||||
tree.edit(&edit);
|
||||
edit
|
||||
Ok(edit)
|
||||
}
|
||||
|
||||
fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
|
||||
|
|
@ -355,7 +355,7 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
|
|||
let row = usize::from_str_radix(row, 10).map_err(|_| error())?;
|
||||
let column = parts.next().ok_or_else(error)?;
|
||||
let column = usize::from_str_radix(column, 10).map_err(|_| error())?;
|
||||
offset_for_position(source_code, Point { row, column })
|
||||
offset_for_position(source_code, Point { row, column })?
|
||||
} else {
|
||||
usize::from_str_radix(position, 10).map_err(|_| error())?
|
||||
};
|
||||
|
|
@ -370,31 +370,48 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
|
|||
})
|
||||
}
|
||||
|
||||
fn offset_for_position(input: &Vec<u8>, position: Point) -> usize {
|
||||
let mut current_position = Point { row: 0, column: 0 };
|
||||
for (i, c) in input.iter().enumerate() {
|
||||
if *c as char == '\n' {
|
||||
current_position.row += 1;
|
||||
current_position.column = 0;
|
||||
} else {
|
||||
current_position.column += 1;
|
||||
}
|
||||
if current_position > position {
|
||||
return i;
|
||||
pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
|
||||
let mut row = 0;
|
||||
let mut offset = 0;
|
||||
let mut iter = memchr::memchr_iter(b'\n', input);
|
||||
loop {
|
||||
if let Some(pos) = iter.next() {
|
||||
if row < position.row {
|
||||
row += 1;
|
||||
offset = pos;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
offset += 1;
|
||||
break;
|
||||
}
|
||||
return input.len();
|
||||
if position.row - row > 0 {
|
||||
return Err(anyhow!("Failed to address a row: {}", position.row));
|
||||
}
|
||||
if let Some(pos) = iter.next() {
|
||||
if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
|
||||
return Err(anyhow!("Failed to address a column: {}", position.column));
|
||||
};
|
||||
} else if input.len() - offset < position.column {
|
||||
return Err(anyhow!("Failed to address a column over the end"));
|
||||
}
|
||||
Ok(offset + position.column)
|
||||
}
|
||||
|
||||
fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
|
||||
let mut result = Point { row: 0, column: 0 };
|
||||
for c in &input[0..offset] {
|
||||
if *c as char == '\n' {
|
||||
result.row += 1;
|
||||
result.column = 0;
|
||||
} else {
|
||||
result.column += 1;
|
||||
}
|
||||
pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
|
||||
if offset > input.len() {
|
||||
return Err(anyhow!("Failed to address an offset: {offset}"));
|
||||
}
|
||||
result
|
||||
let mut result = Point { row: 0, column: 0 };
|
||||
let mut last = 0;
|
||||
for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
|
||||
result.row += 1;
|
||||
last = pos;
|
||||
}
|
||||
result.column = if result.row > 0 {
|
||||
offset - last - 1
|
||||
} else {
|
||||
offset
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -187,7 +187,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<
|
|||
for _ in 0..1 + rand.unsigned(*EDIT_COUNT) {
|
||||
let edit = get_random_edit(&mut rand, &input);
|
||||
undo_stack.push(invert_edit(&input, &edit));
|
||||
perform_edit(&mut tree, &mut input, &edit);
|
||||
perform_edit(&mut tree, &mut input, &edit).unwrap();
|
||||
}
|
||||
|
||||
if log_seed {
|
||||
|
|
@ -219,7 +219,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<
|
|||
|
||||
// Undo all of the edits and re-parse again.
|
||||
while let Some(edit) = undo_stack.pop() {
|
||||
perform_edit(&mut tree2, &mut input, &edit);
|
||||
perform_edit(&mut tree2, &mut input, &edit).unwrap();
|
||||
}
|
||||
if *LOG_GRAPH_ENABLED {
|
||||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
|
|
|
|||
|
|
@ -552,7 +552,7 @@ fn test_node_edit() {
|
|||
|
||||
let edit = get_random_edit(&mut rand, &mut code);
|
||||
let mut tree2 = tree.clone();
|
||||
let edit = perform_edit(&mut tree2, &mut code, &edit);
|
||||
let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap();
|
||||
for node in nodes_before.iter_mut() {
|
||||
node.edit(&edit);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -342,7 +342,8 @@ fn test_parsing_after_editing_beginning_of_code() {
|
|||
deleted_length: 0,
|
||||
inserted_text: b" || 5".to_vec(),
|
||||
},
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut recorder = ReadRecorder::new(&code);
|
||||
let tree = parser
|
||||
|
|
@ -389,7 +390,8 @@ fn test_parsing_after_editing_end_of_code() {
|
|||
deleted_length: 0,
|
||||
inserted_text: b".d".to_vec(),
|
||||
},
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut recorder = ReadRecorder::new(&code);
|
||||
let tree = parser
|
||||
|
|
@ -464,7 +466,8 @@ h + i
|
|||
deleted_length: 0,
|
||||
inserted_text: b"1234".to_vec(),
|
||||
},
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
code,
|
||||
|
|
@ -528,12 +531,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
|
|||
let undo = invert_edit(&source, &edit);
|
||||
|
||||
let mut tree2 = tree.clone();
|
||||
perform_edit(&mut tree2, &mut source, &edit);
|
||||
perform_edit(&mut tree2, &mut source, &edit).unwrap();
|
||||
tree2 = parser.parse(&source, Some(&tree2)).unwrap();
|
||||
assert!(tree2.root_node().has_error());
|
||||
|
||||
let mut tree3 = tree2.clone();
|
||||
perform_edit(&mut tree3, &mut source, &undo);
|
||||
perform_edit(&mut tree3, &mut source, &undo).unwrap();
|
||||
tree3 = parser.parse(&source, Some(&tree3)).unwrap();
|
||||
assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -663,7 +663,7 @@ fn get_changed_ranges(
|
|||
source_code: &mut Vec<u8>,
|
||||
edit: Edit,
|
||||
) -> Vec<Range> {
|
||||
perform_edit(tree, source_code, &edit);
|
||||
perform_edit(tree, source_code, &edit).unwrap();
|
||||
let new_tree = parser.parse(&source_code, Some(tree)).unwrap();
|
||||
let result = tree.changed_ranges(&new_tree).collect();
|
||||
*tree = new_tree;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue