Add incremental parsing unit tests

This commit is contained in:
Max Brunsfeld 2019-02-04 16:43:21 -08:00
parent 4a98f0b87e
commit 9a8cf39277
2 changed files with 127 additions and 68 deletions

View file

@ -1,4 +1,6 @@
use super::random::Rand;
use std::ops::Range;
use std::str;
use tree_sitter::{InputEdit, Point, Tree};
pub struct Edit {
@ -7,6 +9,53 @@ pub struct Edit {
pub inserted_text: Vec<u8>,
}
#[derive(Debug)]
pub struct ReadRecorder<'a> {
content: &'a Vec<u8>,
indices_read: Vec<usize>,
}
impl<'a> ReadRecorder<'a> {
pub fn new(content: &'a Vec<u8>) -> Self {
Self {
content,
indices_read: Vec::new(),
}
}
pub fn read(&mut self, offset: usize) -> &'a [u8] {
if offset < self.content.len() {
if let Err(i) = self.indices_read.binary_search(&offset) {
self.indices_read.insert(i, offset);
}
&self.content[offset..(offset + 1)]
} else {
&[]
}
}
pub fn strings_read(&self) -> Vec<&'a str> {
let mut result = Vec::new();
let mut last_range: Option<Range<usize>> = None;
for index in self.indices_read.iter() {
if let Some(ref mut range) = &mut last_range {
if range.end == *index {
range.end += 1;
} else {
result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
last_range = None;
}
} else {
last_range = Some(*index..(*index + 1));
}
}
if let Some(range) = last_range {
result.push(str::from_utf8(&self.content[range.clone()]).unwrap());
}
result
}
}
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) {
let start_byte = edit.position;
let old_end_byte = edit.position + edit.deleted_length;

View file

@ -1,3 +1,4 @@
use super::helpers::edits::{perform_edit, Edit, ReadRecorder};
use super::helpers::fixtures::{get_language, get_test_language};
use crate::generate::generate_parser_for_grammar;
use std::{thread, usize};
@ -126,88 +127,97 @@ fn test_parsing_with_custom_utf16_input() {
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
// Incremental parsing
#[test]
fn test_parsing_after_editing() {
fn test_parsing_after_editing_beginning_of_code() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
parser.set_language(get_language("javascript")).unwrap();
let mut input_bytes = "fn test(a: A, c: C) {}".as_bytes();
let mut input_bytes_read = Vec::new();
let mut tree = parser
.parse_utf8(
&mut |offset, _| {
let offset = offset;
if offset < input_bytes.len() {
let result = &input_bytes[offset..offset + 1];
input_bytes_read.extend(result.iter());
result
} else {
&[]
}
},
None,
)
.unwrap();
let parameters_sexp = tree
.root_node()
.named_child(0)
.unwrap()
.named_child(1)
.unwrap()
.to_sexp();
let mut code = b"123 + 456 * (10 + x);".to_vec();
let mut tree = parser.parse_utf8(&mut |i, _| &code[i..], None).unwrap();
assert_eq!(
parameters_sexp,
"(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
tree.root_node().to_sexp(),
concat!(
"(program (expression_statement (binary_expression ",
"(number) ",
"(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier)))))))",
)
);
input_bytes_read.clear();
input_bytes = "fn test(a: A, b: B, c: C) {}".as_bytes();
tree.edit(&InputEdit {
start_byte: 14,
old_end_byte: 14,
new_end_byte: 20,
start_position: Point::new(0, 14),
old_end_position: Point::new(0, 14),
new_end_position: Point::new(0, 20),
});
perform_edit(
&mut tree,
&mut code,
&Edit {
position: 3,
deleted_length: 0,
inserted_text: b" || 5".to_vec(),
},
);
let mut recorder = ReadRecorder::new(&code);
let tree = parser
.parse_utf8(
&mut |offset, _| {
let offset = offset;
if offset < input_bytes.len() {
let result = &input_bytes[offset..offset + 1];
input_bytes_read.extend(result.iter());
result
} else {
&[]
}
},
Some(&tree),
)
.parse_utf8(&mut |i, _| recorder.read(i), Some(&tree))
.unwrap();
let parameters_sexp = tree
.root_node()
.named_child(0)
.unwrap()
.named_child(1)
.unwrap()
.to_sexp();
assert_eq!(
parameters_sexp,
"(parameters (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)) (parameter (identifier) (type_identifier)))"
tree.root_node().to_sexp(),
concat!(
"(program (expression_statement (binary_expression ",
"(number) ",
"(binary_expression ",
"(number) ",
"(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier))))))))",
)
);
let retokenized_content = String::from_utf8(input_bytes_read).unwrap();
assert!(retokenized_content.contains("b: B"));
assert!(!retokenized_content.contains("a: A"));
assert!(!retokenized_content.contains("c: C"));
assert!(!retokenized_content.contains("{}"));
assert_eq!(recorder.strings_read(), vec!["123 || 5 "]);
}
#[test]
fn test_parsing_after_editing_end_of_code() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
let mut code = b"x * (100 + abc);".to_vec();
let mut tree = parser.parse_utf8(&mut |i, _| &code[i..], None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
concat!(
"(program (expression_statement (binary_expression ",
"(identifier) ",
"(parenthesized_expression (binary_expression (number) (identifier))))))",
)
);
let position = code.len() - 2;
perform_edit(
&mut tree,
&mut code,
&Edit {
position,
deleted_length: 0,
inserted_text: b".d".to_vec(),
},
);
let mut recorder = ReadRecorder::new(&code);
let tree = parser
.parse_utf8(&mut |i, _| recorder.read(i), Some(&tree))
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
concat!(
"(program (expression_statement (binary_expression ",
"(identifier) ",
"(parenthesized_expression (binary_expression (number) (member_expression (identifier) (property_identifier)))))))"
)
);
assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]);
}
// Thread safety
#[test]
fn test_parsing_on_multiple_threads() {
// Parse this source file so that each thread has a non-trivial amount of