Merge branch 'master' into wasm-language
This commit is contained in:
commit
f4e2f68f14
161 changed files with 10293 additions and 4253 deletions
176
cli/src/parse.rs
176
cli/src/parse.rs
|
|
@ -5,7 +5,7 @@ use std::path::Path;
|
|||
use std::sync::atomic::AtomicUsize;
|
||||
use std::time::Instant;
|
||||
use std::{fmt, fs, usize};
|
||||
use tree_sitter::{InputEdit, LogType, Parser, Point, Tree};
|
||||
use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Edit {
|
||||
|
|
@ -30,36 +30,47 @@ impl fmt::Display for Stats {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse_file_at_path(
|
||||
parser: &mut Parser,
|
||||
path: &Path,
|
||||
edits: &Vec<&str>,
|
||||
max_path_length: usize,
|
||||
quiet: bool,
|
||||
print_time: bool,
|
||||
timeout: u64,
|
||||
debug: bool,
|
||||
debug_graph: bool,
|
||||
debug_xml: bool,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> Result<bool> {
|
||||
#[derive(Copy, Clone)]
|
||||
pub enum ParseOutput {
|
||||
Normal,
|
||||
Quiet,
|
||||
Xml,
|
||||
Dot,
|
||||
}
|
||||
|
||||
pub struct ParseFileOptions<'a> {
|
||||
pub language: Language,
|
||||
pub path: &'a Path,
|
||||
pub edits: &'a [&'a str],
|
||||
pub max_path_length: usize,
|
||||
pub output: ParseOutput,
|
||||
pub print_time: bool,
|
||||
pub timeout: u64,
|
||||
pub debug: bool,
|
||||
pub debug_graph: bool,
|
||||
pub cancellation_flag: Option<&'a AtomicUsize>,
|
||||
pub encoding: Option<u32>,
|
||||
}
|
||||
|
||||
pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result<bool> {
|
||||
let mut _log_session = None;
|
||||
let mut source_code =
|
||||
fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?;
|
||||
parser.set_language(opts.language)?;
|
||||
let mut source_code = fs::read(opts.path)
|
||||
.with_context(|| format!("Error reading source file {:?}", opts.path))?;
|
||||
|
||||
// If the `--cancel` flag was passed, then cancel the parse
|
||||
// when the user types a newline.
|
||||
unsafe { parser.set_cancellation_flag(cancellation_flag) };
|
||||
unsafe { parser.set_cancellation_flag(opts.cancellation_flag) };
|
||||
|
||||
// Set a timeout based on the `--time` flag.
|
||||
parser.set_timeout_micros(timeout);
|
||||
parser.set_timeout_micros(opts.timeout);
|
||||
|
||||
// Render an HTML graph if `--debug-graph` was passed
|
||||
if debug_graph {
|
||||
if opts.debug_graph {
|
||||
_log_session = Some(util::log_graphs(parser, "log.html")?);
|
||||
}
|
||||
// Log to stderr if `--debug` was passed
|
||||
else if debug {
|
||||
else if opts.debug {
|
||||
parser.set_logger(Some(Box::new(|log_type, message| {
|
||||
if log_type == LogType::Lex {
|
||||
io::stderr().write(b" ").unwrap();
|
||||
|
|
@ -69,22 +80,44 @@ pub fn parse_file_at_path(
|
|||
}
|
||||
|
||||
let time = Instant::now();
|
||||
let tree = parser.parse(&source_code, None);
|
||||
|
||||
#[inline(always)]
|
||||
fn is_utf16_bom(bom_bytes: &[u8]) -> bool {
|
||||
bom_bytes == [0xFF, 0xFE] || bom_bytes == [0xFE, 0xFF]
|
||||
}
|
||||
|
||||
let tree = match opts.encoding {
|
||||
Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => {
|
||||
let source_code_utf16 = source_code
|
||||
.chunks_exact(2)
|
||||
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
|
||||
.collect::<Vec<_>>();
|
||||
parser.parse_utf16(&source_code_utf16, None)
|
||||
}
|
||||
None if source_code.len() >= 2 && is_utf16_bom(&source_code[0..2]) => {
|
||||
let source_code_utf16 = source_code
|
||||
.chunks_exact(2)
|
||||
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
|
||||
.collect::<Vec<_>>();
|
||||
parser.parse_utf16(&source_code_utf16, None)
|
||||
}
|
||||
_ => parser.parse(&source_code, None),
|
||||
};
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
if let Some(mut tree) = tree {
|
||||
if debug_graph && !edits.is_empty() {
|
||||
if opts.debug_graph && !opts.edits.is_empty() {
|
||||
println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
|
||||
}
|
||||
|
||||
for (i, edit) in edits.iter().enumerate() {
|
||||
for (i, edit) in opts.edits.iter().enumerate() {
|
||||
let edit = parse_edit_flag(&source_code, edit)?;
|
||||
perform_edit(&mut tree, &mut source_code, &edit);
|
||||
perform_edit(&mut tree, &mut source_code, &edit)?;
|
||||
tree = parser.parse(&source_code, Some(&tree)).unwrap();
|
||||
|
||||
if debug_graph {
|
||||
if opts.debug_graph {
|
||||
println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
|
||||
}
|
||||
}
|
||||
|
|
@ -93,7 +126,7 @@ pub fn parse_file_at_path(
|
|||
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
|
||||
let mut cursor = tree.walk();
|
||||
|
||||
if !quiet {
|
||||
if matches!(opts.output, ParseOutput::Normal) {
|
||||
let mut needs_newline = false;
|
||||
let mut indent_level = 0;
|
||||
let mut did_visit_children = false;
|
||||
|
|
@ -149,7 +182,7 @@ pub fn parse_file_at_path(
|
|||
println!("");
|
||||
}
|
||||
|
||||
if debug_xml {
|
||||
if matches!(opts.output, ParseOutput::Xml) {
|
||||
let mut needs_newline = false;
|
||||
let mut indent_level = 0;
|
||||
let mut did_visit_children = false;
|
||||
|
|
@ -204,6 +237,10 @@ pub fn parse_file_at_path(
|
|||
println!("");
|
||||
}
|
||||
|
||||
if matches!(opts.output, ParseOutput::Dot) {
|
||||
util::print_tree_graph(&tree, "log.html").unwrap();
|
||||
}
|
||||
|
||||
let mut first_error = None;
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
|
|
@ -221,13 +258,13 @@ pub fn parse_file_at_path(
|
|||
}
|
||||
}
|
||||
|
||||
if first_error.is_some() || print_time {
|
||||
if first_error.is_some() || opts.print_time {
|
||||
write!(
|
||||
&mut stdout,
|
||||
"{:width$}\t{} ms",
|
||||
path.to_str().unwrap(),
|
||||
opts.path.to_str().unwrap(),
|
||||
duration_ms,
|
||||
width = max_path_length
|
||||
width = opts.max_path_length
|
||||
)?;
|
||||
if let Some(node) = first_error {
|
||||
let start = node.start_position();
|
||||
|
|
@ -256,29 +293,29 @@ pub fn parse_file_at_path(
|
|||
}
|
||||
|
||||
return Ok(first_error.is_some());
|
||||
} else if print_time {
|
||||
} else if opts.print_time {
|
||||
let duration = time.elapsed();
|
||||
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
|
||||
writeln!(
|
||||
&mut stdout,
|
||||
"{:width$}\t{} ms (timed out)",
|
||||
path.to_str().unwrap(),
|
||||
opts.path.to_str().unwrap(),
|
||||
duration_ms,
|
||||
width = max_path_length
|
||||
width = opts.max_path_length
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
|
||||
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
|
||||
let start_byte = edit.position;
|
||||
let old_end_byte = edit.position + edit.deleted_length;
|
||||
let new_end_byte = edit.position + edit.inserted_text.len();
|
||||
let start_position = position_for_offset(input, start_byte);
|
||||
let old_end_position = position_for_offset(input, old_end_byte);
|
||||
let start_position = position_for_offset(input, start_byte)?;
|
||||
let old_end_position = position_for_offset(input, old_end_byte)?;
|
||||
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
|
||||
let new_end_position = position_for_offset(input, new_end_byte);
|
||||
let new_end_position = position_for_offset(input, new_end_byte)?;
|
||||
let edit = InputEdit {
|
||||
start_byte,
|
||||
old_end_byte,
|
||||
|
|
@ -288,7 +325,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputE
|
|||
new_end_position,
|
||||
};
|
||||
tree.edit(&edit);
|
||||
edit
|
||||
Ok(edit)
|
||||
}
|
||||
|
||||
fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
|
||||
|
|
@ -317,7 +354,7 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
|
|||
let row = usize::from_str_radix(row, 10).map_err(|_| error())?;
|
||||
let column = parts.next().ok_or_else(error)?;
|
||||
let column = usize::from_str_radix(column, 10).map_err(|_| error())?;
|
||||
offset_for_position(source_code, Point { row, column })
|
||||
offset_for_position(source_code, Point { row, column })?
|
||||
} else {
|
||||
usize::from_str_radix(position, 10).map_err(|_| error())?
|
||||
};
|
||||
|
|
@ -332,31 +369,48 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
|
|||
})
|
||||
}
|
||||
|
||||
fn offset_for_position(input: &Vec<u8>, position: Point) -> usize {
|
||||
let mut current_position = Point { row: 0, column: 0 };
|
||||
for (i, c) in input.iter().enumerate() {
|
||||
if *c as char == '\n' {
|
||||
current_position.row += 1;
|
||||
current_position.column = 0;
|
||||
} else {
|
||||
current_position.column += 1;
|
||||
}
|
||||
if current_position > position {
|
||||
return i;
|
||||
pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
|
||||
let mut row = 0;
|
||||
let mut offset = 0;
|
||||
let mut iter = memchr::memchr_iter(b'\n', input);
|
||||
loop {
|
||||
if let Some(pos) = iter.next() {
|
||||
if row < position.row {
|
||||
row += 1;
|
||||
offset = pos;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
offset += 1;
|
||||
break;
|
||||
}
|
||||
return input.len();
|
||||
if position.row - row > 0 {
|
||||
return Err(anyhow!("Failed to address a row: {}", position.row));
|
||||
}
|
||||
if let Some(pos) = iter.next() {
|
||||
if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
|
||||
return Err(anyhow!("Failed to address a column: {}", position.column));
|
||||
};
|
||||
} else if input.len() - offset < position.column {
|
||||
return Err(anyhow!("Failed to address a column over the end"));
|
||||
}
|
||||
Ok(offset + position.column)
|
||||
}
|
||||
|
||||
fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
|
||||
let mut result = Point { row: 0, column: 0 };
|
||||
for c in &input[0..offset] {
|
||||
if *c as char == '\n' {
|
||||
result.row += 1;
|
||||
result.column = 0;
|
||||
} else {
|
||||
result.column += 1;
|
||||
}
|
||||
pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
|
||||
if offset > input.len() {
|
||||
return Err(anyhow!("Failed to address an offset: {offset}"));
|
||||
}
|
||||
result
|
||||
let mut result = Point { row: 0, column: 0 };
|
||||
let mut last = 0;
|
||||
for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
|
||||
result.row += 1;
|
||||
last = pos;
|
||||
}
|
||||
result.column = if result.row > 0 {
|
||||
offset - last - 1
|
||||
} else {
|
||||
offset
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue