diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 1ee3ddc1..f1990963 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -1,4 +1,5 @@ use super::helpers::allocations; +use super::helpers::edits::{get_random_edit, invert_edit, perform_edit}; use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language}; use super::helpers::random::Rand; use super::helpers::scope_sequence::ScopeSequence; @@ -7,7 +8,7 @@ use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry}; use crate::util; use lazy_static::lazy_static; use std::{env, fs, time, usize}; -use tree_sitter::{InputEdit, LogType, Node, Parser, Point, Tree}; +use tree_sitter::{LogType, Node, Parser, Tree}; const EDIT_COUNT: usize = 3; const TRIAL_COUNT: usize = 10; @@ -187,12 +188,6 @@ fn test_real_language_corpus_files() { } } -struct Edit { - position: usize, - deleted_length: usize, - inserted_text: Vec, -} - #[test] fn test_feature_corpus_files() { let test_grammars_dir = fixtures_dir().join("test_grammars"); @@ -279,92 +274,6 @@ fn test_feature_corpus_files() { } } -fn get_random_edit(rand: &mut Rand, input: &Vec) -> Edit { - let choice = rand.unsigned(10); - if choice < 2 { - // Insert text at end - let inserted_text = rand.words(3); - Edit { - position: input.len(), - deleted_length: 0, - inserted_text, - } - } else if choice < 5 { - // Delete text from the end - let mut deleted_length = rand.unsigned(10); - if deleted_length > input.len() { - deleted_length = input.len(); - } - Edit { - position: input.len() - deleted_length, - deleted_length, - inserted_text: vec![], - } - } else if choice < 8 { - // Insert at a random position - let position = rand.unsigned(input.len()); - let word_count = 1 + rand.unsigned(3); - let inserted_text = rand.words(word_count); - Edit { - position, - deleted_length: 0, - inserted_text, - } - } else { - // Replace at random position - let position = rand.unsigned(input.len()); - let deleted_length = rand.unsigned(input.len() - position); - let word_count = 1 + rand.unsigned(3); - let inserted_text = rand.words(word_count); - Edit { - position, - deleted_length, - inserted_text, - } - } -} - -fn invert_edit(input: &Vec, edit: &Edit) -> Edit { - let position = edit.position; - let removed_content = &input[position..(position + edit.deleted_length)]; - Edit { - position, - deleted_length: edit.inserted_text.len(), - inserted_text: removed_content.to_vec(), - } -} - -fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) { - let start_byte = edit.position; - let old_end_byte = edit.position + edit.deleted_length; - let new_end_byte = edit.position + edit.inserted_text.len(); - let start_position = position_for_offset(input, start_byte); - let old_end_position = position_for_offset(input, old_end_byte); - input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned()); - let new_end_position = position_for_offset(input, new_end_byte); - tree.edit(&InputEdit { - start_byte, - old_end_byte, - new_end_byte, - start_position, - old_end_position, - new_end_position, - }); -} - -fn position_for_offset(input: &Vec, offset: usize) -> Point { - let mut result = Point { row: 0, column: 0 }; - for c in &input[0..offset] { - if *c as char == '\n' { - result.row += 1; - result.column = 0; - } else { - result.column += 1; - } - } - result -} - fn check_consistent_sizes(tree: &Tree, input: &Vec) { fn check(node: Node, line_offsets: &Vec) { let start_byte = node.start_byte(); diff --git a/cli/src/tests/helpers/edits.rs b/cli/src/tests/helpers/edits.rs new file mode 100644 index 00000000..4e4d0c25 --- /dev/null +++ b/cli/src/tests/helpers/edits.rs @@ -0,0 +1,94 @@ +use super::random::Rand; +use tree_sitter::{InputEdit, Point, Tree}; + +pub struct Edit { + pub position: usize, + pub deleted_length: usize, + pub inserted_text: Vec, +} + +pub fn perform_edit(tree: &mut Tree, input: &mut Vec, edit: &Edit) { + let start_byte = edit.position; + let old_end_byte = edit.position + edit.deleted_length; + let new_end_byte = edit.position + edit.inserted_text.len(); + let start_position = position_for_offset(input, start_byte); + let old_end_position = position_for_offset(input, old_end_byte); + input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned()); + let new_end_position = position_for_offset(input, new_end_byte); + tree.edit(&InputEdit { + start_byte, + old_end_byte, + new_end_byte, + start_position, + old_end_position, + new_end_position, + }); +} + +pub fn invert_edit(input: &Vec, edit: &Edit) -> Edit { + let position = edit.position; + let removed_content = &input[position..(position + edit.deleted_length)]; + Edit { + position, + deleted_length: edit.inserted_text.len(), + inserted_text: removed_content.to_vec(), + } +} + +pub fn get_random_edit(rand: &mut Rand, input: &Vec) -> Edit { + let choice = rand.unsigned(10); + if choice < 2 { + // Insert text at end + let inserted_text = rand.words(3); + Edit { + position: input.len(), + deleted_length: 0, + inserted_text, + } + } else if choice < 5 { + // Delete text from the end + let mut deleted_length = rand.unsigned(10); + if deleted_length > input.len() { + deleted_length = input.len(); + } + Edit { + position: input.len() - deleted_length, + deleted_length, + inserted_text: vec![], + } + } else if choice < 8 { + // Insert at a random position + let position = rand.unsigned(input.len()); + let word_count = 1 + rand.unsigned(3); + let inserted_text = rand.words(word_count); + Edit { + position, + deleted_length: 0, + inserted_text, + } + } else { + // Replace at random position + let position = rand.unsigned(input.len()); + let deleted_length = rand.unsigned(input.len() - position); + let word_count = 1 + rand.unsigned(3); + let inserted_text = rand.words(word_count); + Edit { + position, + deleted_length, + inserted_text, + } + } +} + +fn position_for_offset(input: &Vec, offset: usize) -> Point { + let mut result = Point { row: 0, column: 0 }; + for c in &input[0..offset] { + if *c as char == '\n' { + result.row += 1; + result.column = 0; + } else { + result.column += 1; + } + } + result +} diff --git a/cli/src/tests/helpers/mod.rs b/cli/src/tests/helpers/mod.rs index bd5c6517..2d1ce574 100644 --- a/cli/src/tests/helpers/mod.rs +++ b/cli/src/tests/helpers/mod.rs @@ -2,3 +2,4 @@ pub(super) mod allocations; pub(super) mod fixtures; pub(super) mod random; pub(super) mod scope_sequence; +pub(super) mod edits; diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index a061d8c6..43fbbc1b 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -30,7 +30,7 @@ fn test_basic_parsing() { } #[test] -fn test_logging() { +fn test_parsing_with_logging() { let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); @@ -57,43 +57,7 @@ fn test_logging() { } #[test] -fn test_tree_cursor() { - let mut parser = Parser::new(); - parser.set_language(rust()).unwrap(); - - let tree = parser - .parse_str( - " - struct Stuff { - a: A; - b: Option, - } - ", - None, - ) - .unwrap(); - - let mut cursor = tree.walk(); - assert_eq!(cursor.node().kind(), "source_file"); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "struct_item"); - - assert!(cursor.goto_first_child()); - assert_eq!(cursor.node().kind(), "struct"); - assert_eq!(cursor.node().is_named(), false); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "type_identifier"); - assert_eq!(cursor.node().is_named(), true); - - assert!(cursor.goto_next_sibling()); - assert_eq!(cursor.node().kind(), "field_declaration_list"); - assert_eq!(cursor.node().is_named(), true); -} - -#[test] -fn test_custom_utf8_input() { +fn test_parsing_with_custom_utf8_input() { let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); @@ -126,7 +90,7 @@ fn test_custom_utf8_input() { } #[test] -fn test_custom_utf16_input() { +fn test_parsing_with_custom_utf16_input() { let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); @@ -162,19 +126,7 @@ fn test_custom_utf16_input() { } #[test] -fn test_node_equality() { - let mut parser = Parser::new(); - parser.set_language(rust()).unwrap(); - let tree = parser.parse_str("struct A {}", None).unwrap(); - let node1 = tree.root_node(); - let node2 = tree.root_node(); - assert_eq!(node1, node2); - assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap()); - assert_ne!(node1.child(0).unwrap(), node2); -} - -#[test] -fn test_editing() { +fn test_parsing_after_editing() { let mut parser = Parser::new(); parser.set_language(rust()).unwrap(); @@ -256,7 +208,7 @@ fn test_editing() { } #[test] -fn test_parallel_parsing() { +fn test_parsing_on_multiple_threads() { // Parse this source file so that each thread has a non-trivial amount of // work to do. let this_file_source = include_str!("parser_test.rs"); diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index 401ff03a..d3a16cba 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -1,10 +1,12 @@ +use super::helpers::edits::{invert_edit, perform_edit, Edit}; use super::helpers::fixtures::get_language; -use tree_sitter::{InputEdit, Language, Parser, Point}; +use std::str; +use tree_sitter::{InputEdit, Parser, Point, Range, Tree}; #[test] -fn test_edit() { +fn test_tree_edit() { let mut parser = Parser::new(); - parser.set_language(javascript()).unwrap(); + parser.set_language(get_language("javascript")).unwrap(); let tree = parser.parse_str(" abc !== def", None).unwrap(); assert_eq!( @@ -186,6 +188,192 @@ fn test_edit() { } } -fn javascript() -> Language { - get_language("javascript") +#[test] +fn test_tree_walk() { + let mut parser = Parser::new(); + parser.set_language(get_language("rust")).unwrap(); + + let tree = parser + .parse_str( + " + struct Stuff { + a: A; + b: Option, + } + ", + None, + ) + .unwrap(); + + let mut cursor = tree.walk(); + assert_eq!(cursor.node().kind(), "source_file"); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "struct_item"); + + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "struct"); + assert_eq!(cursor.node().is_named(), false); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "type_identifier"); + assert_eq!(cursor.node().is_named(), true); + + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration_list"); + assert_eq!(cursor.node().is_named(), true); +} + +#[test] +fn test_tree_node_equality() { + let mut parser = Parser::new(); + parser.set_language(get_language("rust")).unwrap(); + let tree = parser.parse_str("struct A {}", None).unwrap(); + let node1 = tree.root_node(); + let node2 = tree.root_node(); + assert_eq!(node1, node2); + assert_eq!(node1.child(0).unwrap(), node2.child(0).unwrap()); + assert_ne!(node1.child(0).unwrap(), node2); +} + +#[test] +fn test_get_changed_ranges() { + let source_code = b"{a: null};\n".to_vec(); + + let mut parser = Parser::new(); + parser.set_language(get_language("javascript")).unwrap(); + let tree = parser + .parse_utf8(&mut |i, _| &source_code[i..], None) + .unwrap(); + + assert_eq!( + tree.root_node().to_sexp(), + "(program (expression_statement (object (pair (property_identifier) (null)))))" + ); + + // Updating one token + { + let mut tree = tree.clone(); + let mut source_code = source_code.clone(); + + // Replace `null` with `nothing` - that token has changed syntax + let edit = Edit { + position: index_of(&source_code, "ull"), + deleted_length: 3, + inserted_text: b"othing".to_vec(), + }; + let inverse_edit = invert_edit(&source_code, &edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit); + assert_eq!(ranges, vec![range_of(&source_code, "nothing")]); + + // Replace `nothing` with `null` - that token has changed syntax + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit); + assert_eq!(ranges, vec![range_of(&source_code, "null")]); + } + + // Changing only leading whitespace + { + let mut tree = tree.clone(); + let mut source_code = source_code.clone(); + + // Insert leading newline - no changed ranges + let edit = Edit { + position: 0, + deleted_length: 0, + inserted_text: b"\n".to_vec(), + }; + let inverse_edit = invert_edit(&source_code, &edit); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit); + assert_eq!(ranges, vec![]); + + // Remove leading newline - no changed ranges + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit); + assert_eq!(ranges, vec![]); + } + + // Inserting elements + { + let mut tree = tree.clone(); + let mut source_code = source_code.clone(); + + // Insert a key-value pair before the `}` - those tokens are changed + let edit1 = Edit { + position: index_of(&source_code, "}"), + deleted_length: 0, + inserted_text: b", b: false".to_vec(), + }; + let inverse_edit1 = invert_edit(&source_code, &edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1); + assert_eq!(ranges, vec![range_of(&source_code, ", b: false")]); + + let edit2 = Edit { + position: index_of(&source_code, ", b"), + deleted_length: 0, + inserted_text: b", c: 1".to_vec(), + }; + let inverse_edit2 = invert_edit(&source_code, &edit2); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit2); + assert_eq!(ranges, vec![range_of(&source_code, ", c: 1")]); + + // Remove the middle pair + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit2); + assert_eq!(ranges, vec![]); + + // Remove the second pair + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1); + assert_eq!(ranges, vec![]); + } + + // Wrapping elements in larger expressions + { + let mut tree = tree.clone(); + let mut source_code = source_code.clone(); + + // Replace `null` with the binary expression `b === null` + let edit1 = Edit { + position: index_of(&source_code, "null"), + deleted_length: 0, + inserted_text: b"b === ".to_vec(), + }; + let inverse_edit1 = invert_edit(&source_code, &edit1); + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, edit1); + assert_eq!(ranges, vec![range_of(&source_code, "b === null")]); + + // Undo + let ranges = get_changed_ranges(&mut parser, &mut tree, &mut source_code, inverse_edit1); + assert_eq!(ranges, vec![range_of(&source_code, "null")]); + } +} + +fn index_of(text: &Vec, substring: &str) -> usize { + str::from_utf8(text.as_slice()) + .unwrap() + .find(substring) + .unwrap() +} + +fn range_of(text: &Vec, substring: &str) -> Range { + let start_byte = index_of(text, substring); + let end_byte = start_byte + substring.as_bytes().len(); + Range { + start_byte, + end_byte, + start_point: Point::new(0, start_byte), + end_point: Point::new(0, end_byte), + } +} + +fn get_changed_ranges( + parser: &mut Parser, + tree: &mut Tree, + source_code: &mut Vec, + edit: Edit, +) -> Vec { + perform_edit(tree, source_code, &edit); + let new_tree = parser + .parse_utf8(&mut |i, _| &source_code[i..], Some(tree)) + .unwrap(); + let result = tree.changed_ranges(&new_tree); + *tree = new_tree; + result }