Add --xml option to save the parsed code into XML, using node.start_byte() and node.end_byte()

This commit is contained in:
Yijun Yu 2021-01-04 22:07:38 +00:00
parent 115bf3fd44
commit 9e08712773
4 changed files with 75 additions and 0 deletions

16
Cargo.lock generated
View file

@ -272,6 +272,15 @@ version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "html-escape"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d348900ce941b7474395ba922ed3735a517df4546a2939ddb416ce85eeaa988e"
dependencies = [
"utf8-width",
]
[[package]]
name = "idna"
version = "0.1.5"
@ -841,6 +850,7 @@ dependencies = [
"difference",
"dirs",
"glob",
"html-escape",
"lazy_static",
"libloading",
"log",
@ -940,6 +950,12 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
[[package]]
name = "utf8-width"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa"
[[package]]
name = "vec_map"
version = "0.8.1"

View file

@ -36,6 +36,7 @@ serde_derive = "1.0"
smallbitvec = "2.3.0"
tiny_http = "0.6"
webbrowser = "0.5.1"
html-escape = "0.2.6"
[dependencies.tree-sitter]
version = ">= 0.17.0"

View file

@ -63,6 +63,7 @@ fn run() -> error::Result<()> {
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("debug").long("debug").short("d"))
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
.arg(Arg::with_name("debug-xml").long("xml").short("x"))
.arg(Arg::with_name("quiet").long("quiet").short("q"))
.arg(Arg::with_name("stat").long("stat").short("s"))
.arg(Arg::with_name("time").long("time").short("t"))
@ -222,6 +223,7 @@ fn run() -> error::Result<()> {
} else if let Some(matches) = matches.subcommand_matches("parse") {
let debug = matches.is_present("debug");
let debug_graph = matches.is_present("debug-graph");
let debug_xml = matches.is_present("debug-xml");
let quiet = matches.is_present("quiet");
let time = matches.is_present("time");
let edits = matches
@ -257,6 +259,7 @@ fn run() -> error::Result<()> {
timeout,
debug,
debug_graph,
debug_xml,
Some(&cancellation_flag),
)?;

View file

@ -40,6 +40,7 @@ pub fn parse_file_at_path(
timeout: u64,
debug: bool,
debug_graph: bool,
debug_xml: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<bool> {
let mut _log_session = None;
@ -151,6 +152,60 @@ pub fn parse_file_at_path(
println!("");
}
if debug_xml {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
let mut tags: Vec<&str> = Vec::new();
loop {
let node = cursor.node();
let is_named = node.is_named();
if did_visit_children {
if is_named {
let tag = tags.pop();
write!(&mut stdout, "</{}>\n", tag.expect("there is a tag"))?;
needs_newline = true;
}
if cursor.goto_next_sibling() {
did_visit_children = false;
} else if cursor.goto_parent() {
did_visit_children = true;
indent_level -= 1;
} else {
break;
}
} else {
if is_named {
if needs_newline {
stdout.write(b"\n")?;
}
for _ in 0..indent_level {
stdout.write(b" ")?;
}
write!(&mut stdout, "<{}", node.kind())?;
if let Some(field_name) = cursor.field_name() {
write!(&mut stdout, " type=\"{}\"", field_name)?;
}
write!(&mut stdout, ">")?;
tags.push(node.kind());
needs_newline = true;
}
if cursor.goto_first_child() {
did_visit_children = false;
indent_level += 1;
} else {
did_visit_children = true;
let start = node.start_byte();
let end = node.end_byte();
let value = std::str::from_utf8(&source_code[start..end]).expect("has a string");
write!(&mut stdout, "{}", html_escape::encode_text(value))?;
}
}
}
cursor.reset(tree.root_node());
println!("");
}
let mut first_error = None;
loop {
let node = cursor.node();