Merge pull request #1952 from tree-sitter/tree-included-ranges
Allow retrieving a tree's list of included ranges, fix some included range bugs
This commit is contained in:
commit
36b5b6c89e
13 changed files with 369 additions and 104 deletions
|
|
@ -14,7 +14,7 @@ use crate::{
|
|||
util,
|
||||
};
|
||||
use std::fs;
|
||||
use tree_sitter::{LogType, Node, Parser, Tree};
|
||||
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
|
||||
|
||||
#[test]
|
||||
fn test_bash_corpus() {
|
||||
|
|
@ -79,40 +79,49 @@ fn test_rust_corpus() {
|
|||
fn test_language_corpus(language_name: &str) {
|
||||
let grammars_dir = fixtures_dir().join("grammars");
|
||||
let error_corpus_dir = fixtures_dir().join("error_corpus");
|
||||
let template_corpus_dir = fixtures_dir().join("template_corpus");
|
||||
let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
|
||||
if !corpus_dir.is_dir() {
|
||||
corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
|
||||
}
|
||||
|
||||
let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
|
||||
let template_corpus_file =
|
||||
template_corpus_dir.join(&format!("{}_templates.txt", language_name));
|
||||
let main_tests = parse_tests(&corpus_dir).unwrap();
|
||||
let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
|
||||
let template_tests = parse_tests(&template_corpus_file).unwrap_or(TestEntry::default());
|
||||
let mut tests = flatten_tests(main_tests);
|
||||
tests.extend(flatten_tests(error_tests));
|
||||
tests.extend(flatten_tests(template_tests).into_iter().map(|mut t| {
|
||||
t.template_delimiters = Some(("<%", "%>"));
|
||||
t
|
||||
}));
|
||||
|
||||
let language = get_language(language_name);
|
||||
let mut failure_count = 0;
|
||||
for (example_name, input, expected_output, has_fields) in tests {
|
||||
println!(" {} example - {}", language_name, example_name);
|
||||
for test in tests {
|
||||
println!(" {} example - {}", language_name, test.name);
|
||||
|
||||
let passed = allocations::record(|| {
|
||||
let mut log_session = None;
|
||||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
|
||||
|
||||
let tree = parser.parse(&input, None).unwrap();
|
||||
let tree = parser.parse(&test.input, None).unwrap();
|
||||
let mut actual_output = tree.root_node().to_sexp();
|
||||
if !has_fields {
|
||||
if !test.has_fields {
|
||||
actual_output = strip_sexp_fields(actual_output);
|
||||
}
|
||||
|
||||
if actual_output != expected_output {
|
||||
if actual_output != test.output {
|
||||
println!(
|
||||
"Incorrect initial parse for {} - {}",
|
||||
language_name, example_name,
|
||||
language_name, test.name,
|
||||
);
|
||||
print_diff_key();
|
||||
print_diff(&actual_output, &expected_output);
|
||||
print_diff(&actual_output, &test.output);
|
||||
println!("");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -127,7 +136,7 @@ fn test_language_corpus(language_name: &str) {
|
|||
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&input, None).unwrap();
|
||||
let tree = parser.parse(&test.input, None).unwrap();
|
||||
drop(parser);
|
||||
|
||||
for trial in 0..*ITERATION_COUNT {
|
||||
|
|
@ -138,7 +147,7 @@ fn test_language_corpus(language_name: &str) {
|
|||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
let mut tree = tree.clone();
|
||||
let mut input = input.clone();
|
||||
let mut input = test.input.clone();
|
||||
|
||||
if *LOG_GRAPH_ENABLED {
|
||||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
|
|
@ -158,6 +167,7 @@ fn test_language_corpus(language_name: &str) {
|
|||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
}
|
||||
|
||||
set_included_ranges(&mut parser, &input, test.template_delimiters);
|
||||
let mut tree2 = parser.parse(&input, Some(&tree)).unwrap();
|
||||
|
||||
// Check that the new tree is consistent.
|
||||
|
|
@ -178,21 +188,22 @@ fn test_language_corpus(language_name: &str) {
|
|||
eprintln!("{}\n", String::from_utf8_lossy(&input));
|
||||
}
|
||||
|
||||
set_included_ranges(&mut parser, &test.input, test.template_delimiters);
|
||||
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
|
||||
|
||||
// Verify that the final tree matches the expectation from the corpus.
|
||||
let mut actual_output = tree3.root_node().to_sexp();
|
||||
if !has_fields {
|
||||
if !test.has_fields {
|
||||
actual_output = strip_sexp_fields(actual_output);
|
||||
}
|
||||
|
||||
if actual_output != expected_output {
|
||||
if actual_output != test.output {
|
||||
println!(
|
||||
"Incorrect parse for {} - {} - seed {}",
|
||||
language_name, example_name, seed
|
||||
language_name, test.name, seed
|
||||
);
|
||||
print_diff_key();
|
||||
print_diff(&actual_output, &expected_output);
|
||||
print_diff(&actual_output, &test.output);
|
||||
println!("");
|
||||
return false;
|
||||
}
|
||||
|
|
@ -293,23 +304,23 @@ fn test_feature_corpus_files() {
|
|||
eprintln!("test language: {:?}", language_name);
|
||||
}
|
||||
|
||||
for (name, input, expected_output, has_fields) in tests {
|
||||
eprintln!(" example: {:?}", name);
|
||||
for test in tests {
|
||||
eprintln!(" example: {:?}", test.name);
|
||||
|
||||
let passed = allocations::record(|| {
|
||||
let mut log_session = None;
|
||||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&input, None).unwrap();
|
||||
let tree = parser.parse(&test.input, None).unwrap();
|
||||
let mut actual_output = tree.root_node().to_sexp();
|
||||
if !has_fields {
|
||||
if !test.has_fields {
|
||||
actual_output = strip_sexp_fields(actual_output);
|
||||
}
|
||||
if actual_output == expected_output {
|
||||
if actual_output == test.output {
|
||||
true
|
||||
} else {
|
||||
print_diff_key();
|
||||
print_diff(&actual_output, &expected_output);
|
||||
print_diff(&actual_output, &test.output);
|
||||
println!("");
|
||||
false
|
||||
}
|
||||
|
|
@ -390,6 +401,7 @@ fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Re
|
|||
|
||||
let old_range = old_tree.root_node().range();
|
||||
let new_range = new_tree.root_node().range();
|
||||
|
||||
let byte_range =
|
||||
old_range.start_byte.min(new_range.start_byte)..old_range.end_byte.max(new_range.end_byte);
|
||||
let point_range = old_range.start_point.min(new_range.start_point)
|
||||
|
|
@ -407,6 +419,45 @@ fn check_changed_ranges(old_tree: &Tree, new_tree: &Tree, input: &Vec<u8>) -> Re
|
|||
old_scope_sequence.check_changes(&new_scope_sequence, &input, &changed_ranges)
|
||||
}
|
||||
|
||||
fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&str, &str)>) {
|
||||
if let Some((start, end)) = delimiters {
|
||||
let mut ranges = Vec::new();
|
||||
let mut ix = 0;
|
||||
while ix < input.len() {
|
||||
let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break };
|
||||
start_ix += ix + start.len();
|
||||
let end_ix = input[start_ix..]
|
||||
.windows(2)
|
||||
.position(|win| win == end.as_bytes())
|
||||
.map_or(input.len(), |ix| start_ix + ix);
|
||||
ix = end_ix;
|
||||
ranges.push(Range {
|
||||
start_byte: start_ix,
|
||||
end_byte: end_ix,
|
||||
start_point: point_for_offset(input, start_ix),
|
||||
end_point: point_for_offset(input, end_ix),
|
||||
});
|
||||
}
|
||||
|
||||
parser.set_included_ranges(&ranges).unwrap();
|
||||
} else {
|
||||
parser.set_included_ranges(&[]).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn point_for_offset(text: &[u8], offset: usize) -> Point {
|
||||
let mut point = Point::default();
|
||||
for byte in &text[..offset] {
|
||||
if *byte == b'\n' {
|
||||
point.row += 1;
|
||||
point.column = 0;
|
||||
} else {
|
||||
point.column += 1;
|
||||
}
|
||||
}
|
||||
point
|
||||
}
|
||||
|
||||
fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Parser {
|
||||
let mut parser = Parser::new();
|
||||
|
||||
|
|
@ -425,13 +476,16 @@ fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Par
|
|||
parser
|
||||
}
|
||||
|
||||
fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
|
||||
fn helper(
|
||||
test: TestEntry,
|
||||
is_root: bool,
|
||||
prefix: &str,
|
||||
result: &mut Vec<(String, Vec<u8>, String, bool)>,
|
||||
) {
|
||||
struct FlattenedTest {
|
||||
name: String,
|
||||
input: Vec<u8>,
|
||||
output: String,
|
||||
has_fields: bool,
|
||||
template_delimiters: Option<(&'static str, &'static str)>,
|
||||
}
|
||||
|
||||
fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
|
||||
fn helper(test: TestEntry, is_root: bool, prefix: &str, result: &mut Vec<FlattenedTest>) {
|
||||
match test {
|
||||
TestEntry::Example {
|
||||
mut name,
|
||||
|
|
@ -448,7 +502,13 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
|
|||
return;
|
||||
}
|
||||
}
|
||||
result.push((name, input, output, has_fields));
|
||||
result.push(FlattenedTest {
|
||||
name,
|
||||
input,
|
||||
output,
|
||||
has_fields,
|
||||
template_delimiters: None,
|
||||
});
|
||||
}
|
||||
TestEntry::Group {
|
||||
mut name, children, ..
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use rand::{
|
|||
};
|
||||
|
||||
const OPERATORS: &[char] = &[
|
||||
'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.',
|
||||
'+', '-', '<', '>', '(', ')', '*', '/', '&', '|', '!', ',', '.', '%',
|
||||
];
|
||||
|
||||
pub struct Rand(StdRng);
|
||||
|
|
|
|||
|
|
@ -44,20 +44,10 @@ impl ScopeSequence {
|
|||
text: &Vec<u8>,
|
||||
known_changed_ranges: &Vec<Range>,
|
||||
) -> Result<(), String> {
|
||||
if self.0.len() != text.len() {
|
||||
panic!(
|
||||
"Inconsistent scope sequence: {:?}",
|
||||
self.0
|
||||
.iter()
|
||||
.zip(text.iter().map(|c| *c as char))
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(self.0.len(), other.0.len());
|
||||
let mut position = Point { row: 0, column: 0 };
|
||||
for (i, stack) in self.0.iter().enumerate() {
|
||||
let other_stack = &other.0[i];
|
||||
for i in 0..(self.0.len().max(other.0.len())) {
|
||||
let stack = &self.0.get(i);
|
||||
let other_stack = &other.0.get(i);
|
||||
if *stack != *other_stack && ![b'\r', b'\n'].contains(&text[i]) {
|
||||
let containing_range = known_changed_ranges
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -829,6 +829,7 @@ fn test_parsing_with_one_included_range() {
|
|||
js_tree.root_node().start_position(),
|
||||
Point::new(0, source_code.find("console").unwrap())
|
||||
);
|
||||
assert_eq!(js_tree.included_ranges(), &[script_content_node.range()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -853,28 +854,27 @@ fn test_parsing_with_multiple_included_ranges() {
|
|||
let close_quote_node = template_string_node.child(3).unwrap();
|
||||
|
||||
parser.set_language(get_language("html")).unwrap();
|
||||
parser
|
||||
.set_included_ranges(&[
|
||||
Range {
|
||||
start_byte: open_quote_node.end_byte(),
|
||||
start_point: open_quote_node.end_position(),
|
||||
end_byte: interpolation_node1.start_byte(),
|
||||
end_point: interpolation_node1.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node1.end_byte(),
|
||||
start_point: interpolation_node1.end_position(),
|
||||
end_byte: interpolation_node2.start_byte(),
|
||||
end_point: interpolation_node2.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node2.end_byte(),
|
||||
start_point: interpolation_node2.end_position(),
|
||||
end_byte: close_quote_node.start_byte(),
|
||||
end_point: close_quote_node.start_position(),
|
||||
},
|
||||
])
|
||||
.unwrap();
|
||||
let html_ranges = &[
|
||||
Range {
|
||||
start_byte: open_quote_node.end_byte(),
|
||||
start_point: open_quote_node.end_position(),
|
||||
end_byte: interpolation_node1.start_byte(),
|
||||
end_point: interpolation_node1.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node1.end_byte(),
|
||||
start_point: interpolation_node1.end_position(),
|
||||
end_byte: interpolation_node2.start_byte(),
|
||||
end_point: interpolation_node2.start_position(),
|
||||
},
|
||||
Range {
|
||||
start_byte: interpolation_node2.end_byte(),
|
||||
start_point: interpolation_node2.end_position(),
|
||||
end_byte: close_quote_node.start_byte(),
|
||||
end_point: close_quote_node.start_position(),
|
||||
},
|
||||
];
|
||||
parser.set_included_ranges(html_ranges).unwrap();
|
||||
let html_tree = parser.parse(source_code, None).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
|
|
@ -888,6 +888,7 @@ fn test_parsing_with_multiple_included_ranges() {
|
|||
" (end_tag (tag_name))))",
|
||||
)
|
||||
);
|
||||
assert_eq!(html_tree.included_ranges(), html_ranges);
|
||||
|
||||
let div_element_node = html_tree.root_node().child(0).unwrap();
|
||||
let hello_text_node = div_element_node.child(1).unwrap();
|
||||
|
|
@ -950,7 +951,9 @@ fn test_parsing_with_included_range_containing_mismatched_positions() {
|
|||
|
||||
parser.set_included_ranges(&[range_to_parse]).unwrap();
|
||||
|
||||
let html_tree = parser.parse(source_code, None).unwrap();
|
||||
let html_tree = parser
|
||||
.parse_with(&mut chunked_input(source_code, 3), None)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(html_tree.root_node().range(), range_to_parse);
|
||||
|
||||
|
|
@ -1077,7 +1080,9 @@ fn test_parsing_with_a_newly_excluded_range() {
|
|||
// Parse HTML including the template directive, which will cause an error
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("html")).unwrap();
|
||||
let mut first_tree = parser.parse(&source_code, None).unwrap();
|
||||
let mut first_tree = parser
|
||||
.parse_with(&mut chunked_input(&source_code, 3), None)
|
||||
.unwrap();
|
||||
|
||||
// Insert code at the beginning of the document.
|
||||
let prefix = "a very very long line of plain text. ";
|
||||
|
|
@ -1112,7 +1117,9 @@ fn test_parsing_with_a_newly_excluded_range() {
|
|||
},
|
||||
])
|
||||
.unwrap();
|
||||
let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
|
||||
let tree = parser
|
||||
.parse_with(&mut chunked_input(&source_code, 3), Some(&first_tree))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
|
|
@ -1163,7 +1170,9 @@ fn test_parsing_with_a_newly_included_range() {
|
|||
parser
|
||||
.set_included_ranges(&[simple_range(range1_start, range1_end)])
|
||||
.unwrap();
|
||||
let tree = parser.parse(source_code, None).unwrap();
|
||||
let tree = parser
|
||||
.parse_with(&mut chunked_input(&source_code, 3), None)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
tree.root_node().to_sexp(),
|
||||
concat!(
|
||||
|
|
@ -1180,7 +1189,9 @@ fn test_parsing_with_a_newly_included_range() {
|
|||
simple_range(range3_start, range3_end),
|
||||
])
|
||||
.unwrap();
|
||||
let tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
|
||||
let tree2 = parser
|
||||
.parse_with(&mut chunked_input(&source_code, 3), Some(&tree))
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
tree2.root_node().to_sexp(),
|
||||
concat!(
|
||||
|
|
@ -1288,3 +1299,7 @@ fn simple_range(start: usize, end: usize) -> Range {
|
|||
end_point: Point::new(0, end),
|
||||
}
|
||||
}
|
||||
|
||||
fn chunked_input<'a>(text: &'a str, size: usize) -> impl FnMut(usize, Point) -> &'a [u8] {
|
||||
move |offset, _| text[offset..text.len().min(offset + size)].as_bytes()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -232,6 +232,71 @@ fn test_tree_edit() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tree_edit_with_included_ranges() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(get_language("html")).unwrap();
|
||||
|
||||
let source = "<div><% if a %><span>a</span><% else %><span>b</span><% end %></div>";
|
||||
|
||||
let ranges = [0..5, 15..29, 39..53, 62..68];
|
||||
|
||||
parser
|
||||
.set_included_ranges(
|
||||
&ranges
|
||||
.iter()
|
||||
.map(|range| Range {
|
||||
start_byte: range.start,
|
||||
end_byte: range.end,
|
||||
start_point: Point::new(0, range.start),
|
||||
end_point: Point::new(0, range.end),
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut tree = parser.parse(source, None).unwrap();
|
||||
|
||||
tree.edit(&InputEdit {
|
||||
start_byte: 29,
|
||||
old_end_byte: 53,
|
||||
new_end_byte: 29,
|
||||
start_position: Point::new(0, 29),
|
||||
old_end_position: Point::new(0, 53),
|
||||
new_end_position: Point::new(0, 29),
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
tree.included_ranges(),
|
||||
&[
|
||||
Range {
|
||||
start_byte: 0,
|
||||
end_byte: 5,
|
||||
start_point: Point::new(0, 0),
|
||||
end_point: Point::new(0, 5),
|
||||
},
|
||||
Range {
|
||||
start_byte: 15,
|
||||
end_byte: 29,
|
||||
start_point: Point::new(0, 15),
|
||||
end_point: Point::new(0, 29),
|
||||
},
|
||||
Range {
|
||||
start_byte: 29,
|
||||
end_byte: 29,
|
||||
start_point: Point::new(0, 29),
|
||||
end_point: Point::new(0, 29),
|
||||
},
|
||||
Range {
|
||||
start_byte: 38,
|
||||
end_byte: 44,
|
||||
start_point: Point::new(0, 38),
|
||||
end_point: Point::new(0, 44),
|
||||
}
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tree_cursor() {
|
||||
let mut parser = Parser::new();
|
||||
|
|
|
|||
|
|
@ -345,6 +345,9 @@ extern "C" {
|
|||
#[doc = " Get the language that was used to parse the syntax tree."]
|
||||
pub fn ts_tree_language(arg1: *const TSTree) -> *const TSLanguage;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_included_ranges(arg1: *const TSTree, length: *mut u32) -> *mut TSRange;
|
||||
}
|
||||
extern "C" {
|
||||
#[doc = " Edit the syntax tree to keep it in sync with source code that has been"]
|
||||
#[doc = " edited."]
|
||||
|
|
|
|||
|
|
@ -763,6 +763,18 @@ impl Tree {
|
|||
util::CBufferIter::new(ptr, count as usize).map(|r| r.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the included ranges that were used to parse the syntax tree.
|
||||
pub fn included_ranges(&self) -> Vec<Range> {
|
||||
let mut count = 0u32;
|
||||
unsafe {
|
||||
let ptr = ffi::ts_tree_included_ranges(self.0.as_ptr(), &mut count as *mut u32);
|
||||
let ranges = slice::from_raw_parts(ptr, count as usize);
|
||||
let result = ranges.iter().copied().map(|range| range.into()).collect();
|
||||
(FREE_FN)(ptr as *mut c_void);
|
||||
result
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Tree {
|
||||
|
|
|
|||
|
|
@ -381,6 +381,13 @@ TSNode ts_tree_root_node_with_offset(
|
|||
*/
|
||||
const TSLanguage *ts_tree_language(const TSTree *);
|
||||
|
||||
/**
|
||||
* Get the array of included ranges that was used to parse the syntax tree.
|
||||
*
|
||||
* The returned pointer must be freed by the caller.
|
||||
*/
|
||||
TSRange *ts_tree_included_ranges(const TSTree *, uint32_t *length);
|
||||
|
||||
/**
|
||||
* Edit the syntax tree to keep it in sync with source code that has been
|
||||
* edited.
|
||||
|
|
|
|||
|
|
@ -104,13 +104,16 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
|
||||
static void ts_lexer_goto(Lexer *self, Length position) {
|
||||
self->current_position = position;
|
||||
bool found_included_range = false;
|
||||
|
||||
// Move to the first valid position at or after the given position.
|
||||
bool found_included_range = false;
|
||||
for (unsigned i = 0; i < self->included_range_count; i++) {
|
||||
TSRange *included_range = &self->included_ranges[i];
|
||||
if (included_range->end_byte > position.bytes) {
|
||||
if (included_range->start_byte >= position.bytes) {
|
||||
if (
|
||||
included_range->end_byte > self->current_position.bytes &&
|
||||
included_range->end_byte > included_range->start_byte
|
||||
) {
|
||||
if (included_range->start_byte >= self->current_position.bytes) {
|
||||
self->current_position = (Length) {
|
||||
.bytes = included_range->start_byte,
|
||||
.extent = included_range->start_point,
|
||||
|
|
@ -127,8 +130,8 @@ static void ts_lexer_goto(Lexer *self, Length position) {
|
|||
// If the current position is outside of the current chunk of text,
|
||||
// then clear out the current chunk of text.
|
||||
if (self->chunk && (
|
||||
position.bytes < self->chunk_start ||
|
||||
position.bytes >= self->chunk_start + self->chunk_size
|
||||
self->current_position.bytes < self->chunk_start ||
|
||||
self->current_position.bytes >= self->chunk_start + self->chunk_size
|
||||
)) {
|
||||
ts_lexer__clear_chunk(self);
|
||||
}
|
||||
|
|
@ -164,27 +167,31 @@ static void ts_lexer__do_advance(Lexer *self, bool skip) {
|
|||
}
|
||||
}
|
||||
|
||||
const TSRange *current_range = NULL;
|
||||
if (self->current_included_range_index < self->included_range_count) {
|
||||
current_range = &self->included_ranges[self->current_included_range_index];
|
||||
if (self->current_position.bytes == current_range->end_byte) {
|
||||
self->current_included_range_index++;
|
||||
if (self->current_included_range_index < self->included_range_count) {
|
||||
current_range++;
|
||||
self->current_position = (Length) {
|
||||
current_range->start_byte,
|
||||
current_range->start_point,
|
||||
};
|
||||
} else {
|
||||
current_range = NULL;
|
||||
}
|
||||
const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
|
||||
while (
|
||||
self->current_position.bytes >= current_range->end_byte ||
|
||||
current_range->end_byte == current_range->start_byte
|
||||
) {
|
||||
self->current_included_range_index++;
|
||||
if (self->current_included_range_index < self->included_range_count) {
|
||||
current_range++;
|
||||
self->current_position = (Length) {
|
||||
current_range->start_byte,
|
||||
current_range->start_point,
|
||||
};
|
||||
} else {
|
||||
current_range = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (skip) self->token_start_position = self->current_position;
|
||||
|
||||
if (current_range) {
|
||||
if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
|
||||
if (
|
||||
self->current_position.bytes < self->chunk_start ||
|
||||
self->current_position.bytes >= self->chunk_start + self->chunk_size
|
||||
) {
|
||||
ts_lexer__get_chunk(self);
|
||||
}
|
||||
ts_lexer__get_lookahead(self);
|
||||
|
|
@ -339,6 +346,13 @@ void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
|
|||
ts_lexer__mark_end(&self->data);
|
||||
}
|
||||
|
||||
// If the token ended at an included range boundary, then its end position
|
||||
// will have been reset to the end of the preceding range. Reset the start
|
||||
// position to match.
|
||||
if (self->token_end_position.bytes < self->token_start_position.bytes) {
|
||||
self->token_start_position = self->token_end_position;
|
||||
}
|
||||
|
||||
uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
|
||||
|
||||
// In order to determine that a byte sequence is invalid UTF8 or UTF16,
|
||||
|
|
|
|||
|
|
@ -447,8 +447,14 @@ static Subtree ts_parser__lex(
|
|||
// avoid infinite loops which could otherwise occur, because the lexer is
|
||||
// looking for any possible token, instead of looking for the specific set of
|
||||
// tokens that are valid in some parse state.
|
||||
//
|
||||
// Note that it's possible that the token end position may be *before* the
|
||||
// original position of the lexer because of the way that tokens are positioned
|
||||
// at included range boundaries: when a token is terminated at the start of
|
||||
// an included range, it is marked as ending at the *end* of the preceding
|
||||
// included range.
|
||||
if (
|
||||
self->lexer.token_end_position.bytes == current_position.bytes &&
|
||||
self->lexer.token_end_position.bytes <= current_position.bytes &&
|
||||
(error_mode || !ts_stack_has_advanced_since_error(self->stack, version)) &&
|
||||
!external_scanner_state_changed
|
||||
) {
|
||||
|
|
@ -525,10 +531,6 @@ static Subtree ts_parser__lex(
|
|||
self->language
|
||||
);
|
||||
} else {
|
||||
if (self->lexer.token_end_position.bytes < self->lexer.token_start_position.bytes) {
|
||||
self->lexer.token_start_position = self->lexer.token_end_position;
|
||||
}
|
||||
|
||||
bool is_keyword = false;
|
||||
TSSymbol symbol = self->lexer.data.result_symbol;
|
||||
Length padding = length_sub(self->lexer.token_start_position, start_position);
|
||||
|
|
|
|||
|
|
@ -66,17 +66,23 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
|
|||
range->end_point = POINT_MAX;
|
||||
}
|
||||
}
|
||||
if (range->start_byte >= edit->old_end_byte) {
|
||||
range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
|
||||
range->start_point = point_add(
|
||||
edit->new_end_point,
|
||||
point_sub(range->start_point, edit->old_end_point)
|
||||
);
|
||||
if (range->start_byte < edit->new_end_byte) {
|
||||
range->start_byte = UINT32_MAX;
|
||||
range->start_point = POINT_MAX;
|
||||
}
|
||||
} else if (range->end_byte > edit->start_byte) {
|
||||
range->end_byte = edit->start_byte;
|
||||
range->end_point = edit->start_point;
|
||||
}
|
||||
if (range->start_byte >= edit->old_end_byte) {
|
||||
range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
|
||||
range->start_point = point_add(
|
||||
edit->new_end_point,
|
||||
point_sub(range->start_point, edit->old_end_point)
|
||||
);
|
||||
if (range->start_byte < edit->new_end_byte) {
|
||||
range->start_byte = UINT32_MAX;
|
||||
range->start_point = POINT_MAX;
|
||||
}
|
||||
} else if (range->start_byte > edit->start_byte) {
|
||||
range->start_byte = edit->start_byte;
|
||||
range->start_point = edit->start_point;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -85,6 +91,13 @@ void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
|
|||
ts_subtree_pool_delete(&pool);
|
||||
}
|
||||
|
||||
TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
|
||||
*length = self->included_range_count;
|
||||
TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
|
||||
memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
|
||||
return ranges;
|
||||
}
|
||||
|
||||
TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
|
||||
TreeCursor cursor1 = {NULL, array_new()};
|
||||
TreeCursor cursor2 = {NULL, array_new()};
|
||||
|
|
|
|||
6
test/fixtures/template_corpus/readme.md
vendored
Normal file
6
test/fixtures/template_corpus/readme.md
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
The Template Corpus
|
||||
===================
|
||||
|
||||
This directory contains corpus tests that exercise parsing a set of disjoint ranges within a file.
|
||||
|
||||
Each of these input files contains source code surrounded by the delimiters `<%` and `%>`. The content outside of these delimiters is meant to be ignored.
|
||||
78
test/fixtures/template_corpus/ruby_templates.txt
vendored
Normal file
78
test/fixtures/template_corpus/ruby_templates.txt
vendored
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
==============================
|
||||
Templates with errors
|
||||
==============================
|
||||
|
||||
<div>
|
||||
<% if notice.present? %>
|
||||
<p id="notice"><% notice %></p>
|
||||
<% end %>
|
||||
<div>
|
||||
<h1>Foods</h1>
|
||||
<div>
|
||||
<% link_to 'New food', new_food_path, class: "block font-medium" %>
|
||||
<% link_to 'Search Database', database_foods_search_path, class: "block font-medium" %>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<% . render partial: "form", locals: { food: @new_food } %>
|
||||
|
||||
<% form_with url: "/search", method: :get do |form| %>
|
||||
<% form.label :previous_query, 'Search previous foods:' %>
|
||||
<% form.text_field :previous_query %>
|
||||
<% form.submit "Search" %>
|
||||
<% end %>
|
||||
|
||||
<div id="recipes">
|
||||
<% render @foods %>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(if
|
||||
(call (identifier) (identifier))
|
||||
(then (identifier)))
|
||||
(call
|
||||
(identifier)
|
||||
(argument_list
|
||||
(string (string_content))
|
||||
(identifier)
|
||||
(pair (hash_key_symbol) (string (string_content)))))
|
||||
(call
|
||||
(identifier)
|
||||
(argument_list
|
||||
(string (string_content))
|
||||
(identifier)
|
||||
(pair (hash_key_symbol) (string (string_content)))))
|
||||
(ERROR)
|
||||
(call
|
||||
(identifier)
|
||||
(argument_list
|
||||
(pair (hash_key_symbol) (string (string_content)))
|
||||
(pair (hash_key_symbol) (hash (pair (hash_key_symbol) (instance_variable))))))
|
||||
(call
|
||||
(identifier)
|
||||
(argument_list
|
||||
(pair (hash_key_symbol) (string (string_content)))
|
||||
(pair (hash_key_symbol) (simple_symbol)))
|
||||
(do_block
|
||||
(block_parameters
|
||||
(identifier))
|
||||
(body_statement
|
||||
(call
|
||||
(identifier)
|
||||
(identifier)
|
||||
(argument_list (simple_symbol) (string (string_content))))
|
||||
(call
|
||||
(identifier)
|
||||
(identifier)
|
||||
(argument_list
|
||||
(simple_symbol)))
|
||||
(call
|
||||
(identifier)
|
||||
(identifier)
|
||||
(argument_list (string (string_content)))))))
|
||||
(call
|
||||
(identifier)
|
||||
(argument_list (instance_variable))))
|
||||
Loading…
Add table
Add a link
Reference in a new issue