diff --git a/Cargo.lock b/Cargo.lock index 80a4e28d..be8829ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -469,9 +469,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.5.0" +version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" [[package]] name = "minimal-lexical" @@ -941,6 +941,7 @@ dependencies = [ "indoc", "lazy_static", "log", + "memchr", "path-slash", "pretty_assertions", "rand", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 0edd3c3d..e62c443a 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -32,6 +32,7 @@ glob = "0.3.1" html-escape = "0.2.13" indexmap = "2.0.0" lazy_static = "1.4.0" +memchr = "2.6.3" path-slash = "0.2.1" regex = "1.9.1" regex-syntax = "0.7.4" diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 5b1a4b31..68279361 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -115,7 +115,7 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result { for (i, edit) in opts.edits.iter().enumerate() { let edit = parse_edit_flag(&source_code, edit)?; - perform_edit(&mut tree, &mut source_code, &edit); + perform_edit(&mut tree, &mut source_code, &edit)?; tree = parser.parse(&source_code, Some(&tree)).unwrap(); if opts.debug_graph { @@ -309,14 +309,14 @@ pub fn parse_file_at_path(opts: ParseFileOptions) -> Result { Ok(false) } -pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputEdit { +pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> Result { let start_byte = edit.position; let old_end_byte = edit.position + edit.deleted_length; let new_end_byte = edit.position + edit.inserted_text.len(); - let start_position = position_for_offset(input, start_byte); - let old_end_position = position_for_offset(input, old_end_byte); + let start_position = position_for_offset(input, start_byte)?; + let old_end_position = position_for_offset(input, old_end_byte)?; input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned()); - let new_end_position = position_for_offset(input, new_end_byte); + let new_end_position = position_for_offset(input, new_end_byte)?; let edit = InputEdit { start_byte, old_end_byte, @@ -326,7 +326,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) -> InputE new_end_position, }; tree.edit(&edit); - edit + Ok(edit) } fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { @@ -355,7 +355,7 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { let row = usize::from_str_radix(row, 10).map_err(|_| error())?; let column = parts.next().ok_or_else(error)?; let column = usize::from_str_radix(column, 10).map_err(|_| error())?; - offset_for_position(source_code, Point { row, column }) + offset_for_position(source_code, Point { row, column })? } else { usize::from_str_radix(position, 10).map_err(|_| error())? }; @@ -370,31 +370,48 @@ fn parse_edit_flag(source_code: &Vec, flag: &str) -> Result { }) } -fn offset_for_position(input: &Vec, position: Point) -> usize { - let mut current_position = Point { row: 0, column: 0 }; - for (i, c) in input.iter().enumerate() { - if *c as char == '\n' { - current_position.row += 1; - current_position.column = 0; - } else { - current_position.column += 1; - } - if current_position > position { - return i; +pub fn offset_for_position(input: &[u8], position: Point) -> Result { + let mut row = 0; + let mut offset = 0; + let mut iter = memchr::memchr_iter(b'\n', input); + loop { + if let Some(pos) = iter.next() { + if row < position.row { + row += 1; + offset = pos; + continue; + } } + offset += 1; + break; } - return input.len(); + if position.row - row > 0 { + return Err(anyhow!("Failed to address a row: {}", position.row)); + } + if let Some(pos) = iter.next() { + if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) { + return Err(anyhow!("Failed to address a column: {}", position.column)); + }; + } else if input.len() - offset < position.column { + return Err(anyhow!("Failed to address a column over the end")); + } + Ok(offset + position.column) } -fn position_for_offset(input: &Vec, offset: usize) -> Point { - let mut result = Point { row: 0, column: 0 }; - for c in &input[0..offset] { - if *c as char == '\n' { - result.row += 1; - result.column = 0; - } else { - result.column += 1; - } +pub fn position_for_offset(input: &[u8], offset: usize) -> Result { + if offset > input.len() { + return Err(anyhow!("Failed to address an offset: {offset}")); } - result + let mut result = Point { row: 0, column: 0 }; + let mut last = 0; + for pos in memchr::memchr_iter(b'\n', &input[..offset]) { + result.row += 1; + last = pos; + } + result.column = if result.row > 0 { + offset - last - 1 + } else { + offset + }; + Ok(result) } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 8a4c54cc..589b1839 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -187,7 +187,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option< for _ in 0..1 + rand.unsigned(*EDIT_COUNT) { let edit = get_random_edit(&mut rand, &input); undo_stack.push(invert_edit(&input, &edit)); - perform_edit(&mut tree, &mut input, &edit); + perform_edit(&mut tree, &mut input, &edit).unwrap(); } if log_seed { @@ -219,7 +219,7 @@ fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option< // Undo all of the edits and re-parse again. while let Some(edit) = undo_stack.pop() { - perform_edit(&mut tree2, &mut input, &edit); + perform_edit(&mut tree2, &mut input, &edit).unwrap(); } if *LOG_GRAPH_ENABLED { eprintln!("{}\n", String::from_utf8_lossy(&input)); diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 43b3d66b..c4548d3e 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -552,7 +552,7 @@ fn test_node_edit() { let edit = get_random_edit(&mut rand, &mut code); let mut tree2 = tree.clone(); - let edit = perform_edit(&mut tree2, &mut code, &edit); + let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap(); for node in nodes_before.iter_mut() { node.edit(&edit); } diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 99616f56..434a81f9 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -342,7 +342,8 @@ fn test_parsing_after_editing_beginning_of_code() { deleted_length: 0, inserted_text: b" || 5".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -389,7 +390,8 @@ fn test_parsing_after_editing_end_of_code() { deleted_length: 0, inserted_text: b".d".to_vec(), }, - ); + ) + .unwrap(); let mut recorder = ReadRecorder::new(&code); let tree = parser @@ -464,7 +466,8 @@ h + i deleted_length: 0, inserted_text: b"1234".to_vec(), }, - ); + ) + .unwrap(); assert_eq!( code, @@ -528,12 +531,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() { let undo = invert_edit(&source, &edit); let mut tree2 = tree.clone(); - perform_edit(&mut tree2, &mut source, &edit); + perform_edit(&mut tree2, &mut source, &edit).unwrap(); tree2 = parser.parse(&source, Some(&tree2)).unwrap(); assert!(tree2.root_node().has_error()); let mut tree3 = tree2.clone(); - perform_edit(&mut tree3, &mut source, &undo); + perform_edit(&mut tree3, &mut source, &undo).unwrap(); tree3 = parser.parse(&source, Some(&tree3)).unwrap(); assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),); } diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index 7d091c3f..c63b588b 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -663,7 +663,7 @@ fn get_changed_ranges( source_code: &mut Vec, edit: Edit, ) -> Vec { - perform_edit(tree, source_code, &edit); + perform_edit(tree, source_code, &edit).unwrap(); let new_tree = parser.parse(&source_code, Some(tree)).unwrap(); let result = tree.changed_ranges(&new_tree).collect(); *tree = new_tree;