Merge branch 'master' into wasm-language

This commit is contained in:
Max Brunsfeld 2023-10-27 11:57:04 +01:00
commit f4e2f68f14
161 changed files with 10293 additions and 4253 deletions

View file

@ -57,6 +57,7 @@ struct ParseTableBuilder<'a> {
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
non_terminal_extra_states: Vec<(Symbol, usize)>,
actual_conflicts: HashSet<Vec<Symbol>>,
parse_table: ParseTable,
}
@ -132,6 +133,20 @@ impl<'a> ParseTableBuilder<'a> {
)?;
}
if !self.actual_conflicts.is_empty() {
println!("Warning: unnecessary conflicts");
for conflict in &self.actual_conflicts {
println!(
" {}",
conflict
.iter()
.map(|symbol| format!("`{}`", self.symbol_name(symbol)))
.collect::<Vec<_>>()
.join(", ")
);
}
}
Ok((self.parse_table, self.parse_state_info_by_id))
}
@ -582,6 +597,7 @@ impl<'a> ParseTableBuilder<'a> {
.expected_conflicts
.contains(&actual_conflict)
{
self.actual_conflicts.remove(&actual_conflict);
return Ok(());
}
@ -964,6 +980,7 @@ pub(crate) fn build_parse_table<'a>(
inlines: &'a InlinedProductionMap,
variable_info: &'a Vec<VariableInfo>,
) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect();
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
populate_following_tokens(
@ -979,6 +996,7 @@ pub(crate) fn build_parse_table<'a>(
item_set_builder,
variable_info,
non_terminal_extra_states: Vec::new(),
actual_conflicts,
state_ids_by_item_set: IndexMap::default(),
core_ids_by_core: HashMap::new(),
parse_state_info_by_id: Vec::new(),

View file

@ -390,12 +390,12 @@ mod tests {
Variable {
name: "token_0".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("[a-f]1|0x\\d"),
rule: Rule::pattern("[a-f]1|0x\\d", ""),
},
Variable {
name: "token_1".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("d*ef"),
rule: Rule::pattern("d*ef", ""),
},
],
})
@ -426,7 +426,7 @@ mod tests {
Variable {
name: "identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("\\w+"),
rule: Rule::pattern("\\w+", ""),
},
Variable {
name: "instanceof".to_string(),
@ -471,7 +471,7 @@ mod tests {
#[test]
fn test_token_conflicts_with_separators() {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
variables: vec![
Variable {
name: "x".to_string(),
@ -498,7 +498,7 @@ mod tests {
#[test]
fn test_token_conflicts_with_open_ended_tokens() {
let grammar = expand_tokens(ExtractedLexicalGrammar {
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
variables: vec![
Variable {
name: "x".to_string(),
@ -508,7 +508,7 @@ mod tests {
Variable {
name: "anything".to_string(),
kind: VariableType::Named,
rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*")),
rule: Rule::prec(Precedence::Integer(-1), Rule::pattern(".*", "")),
},
],
})

View file

@ -181,7 +181,11 @@ function normalize(value) {
value
};
case RegExp:
return {
return value.flags ? {
type: 'PATTERN',
value: value.source,
flags: value.flags
} : {
type: 'PATTERN',
value: value.source
};

View file

@ -63,7 +63,7 @@
},
"supertypes": {
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
"type": "array",
"items": {
"description": "the name of a rule in `rules` or `extras`",

View file

@ -21,10 +21,10 @@ use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::{env, fs};
lazy_static! {
static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*")
@ -44,25 +44,27 @@ pub fn generate_parser_in_directory(
abi_version: usize,
generate_bindings: bool,
report_symbol_name: Option<&str>,
js_runtime: Option<&str>,
) -> Result<()> {
let src_path = repo_path.join("src");
let header_path = src_path.join("tree_sitter");
// Read the grammar.json.
let grammar_json = match grammar_path {
Some(path) => load_grammar_file(path.as_ref(), js_runtime)?,
None => {
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
load_grammar_file(&grammar_js_path, js_runtime)?
}
};
// Ensure that the output directories exist.
fs::create_dir_all(&src_path)?;
fs::create_dir_all(&header_path)?;
// Read the grammar.json.
let grammar_json;
match grammar_path {
Some(path) => {
grammar_json = load_grammar_file(path.as_ref())?;
}
None => {
let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into());
grammar_json = load_grammar_file(&grammar_js_path)?;
fs::write(&src_path.join("grammar.json"), &grammar_json)?;
}
if grammar_path.is_none() {
fs::write(&src_path.join("grammar.json"), &grammar_json)
.with_context(|| format!("Failed to write grammar.json to {:?}", src_path))?;
}
// Parse and preprocess the grammar.
@ -155,10 +157,18 @@ fn generate_parser_for_grammar_with_opts(
})
}
pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
pub fn load_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
if grammar_path.is_dir() {
return Err(anyhow!(
"Path to a grammar file with `.js` or `.json` extension is required"
));
}
match grammar_path.extension().and_then(|e| e.to_str()) {
Some("js") => Ok(load_js_grammar_file(grammar_path)?),
Some("json") => Ok(fs::read_to_string(grammar_path)?),
Some("js") => Ok(load_js_grammar_file(grammar_path, js_runtime)
.with_context(|| "Failed to load grammar.js")?),
Some("json") => {
Ok(fs::read_to_string(grammar_path).with_context(|| "Failed to load grammar.json")?)
}
_ => Err(anyhow!(
"Unknown grammar file extension: {:?}",
grammar_path
@ -166,21 +176,24 @@ pub fn load_grammar_file(grammar_path: &Path) -> Result<String> {
}
}
fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result<String> {
let grammar_path = fs::canonicalize(grammar_path)?;
let mut node_process = Command::new("node")
let js_runtime = js_runtime.unwrap_or("node");
let mut node_process = Command::new(js_runtime)
.env("TREE_SITTER_GRAMMAR_PATH", grammar_path)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.expect("Failed to run `node`");
.with_context(|| format!("Failed to run `{js_runtime}`"))?;
let mut node_stdin = node_process
.stdin
.take()
.expect("Failed to open stdin for node");
.with_context(|| "Failed to open stdin for node")?;
let cli_version = Version::parse(env!("CARGO_PKG_VERSION"))
.expect("Could not parse this package's version as semver.");
.with_context(|| "Could not parse this package's version as semver.")?;
write!(
node_stdin,
"global.TREE_SITTER_CLI_VERSION_MAJOR = {};
@ -188,22 +201,22 @@ fn load_js_grammar_file(grammar_path: &Path) -> Result<String> {
global.TREE_SITTER_CLI_VERSION_PATCH = {};",
cli_version.major, cli_version.minor, cli_version.patch,
)
.expect("Failed to write tree-sitter version to node's stdin");
.with_context(|| "Failed to write tree-sitter version to node's stdin")?;
let javascript_code = include_bytes!("./dsl.js");
node_stdin
.write(javascript_code)
.expect("Failed to write grammar dsl to node's stdin");
.with_context(|| "Failed to write grammar dsl to node's stdin")?;
drop(node_stdin);
let output = node_process
.wait_with_output()
.expect("Failed to read output from node");
.with_context(|| "Failed to read output from node")?;
match output.status.code() {
None => panic!("Node process was killed"),
Some(0) => {}
Some(code) => return Err(anyhow!("Node process exited with status {}", code)),
}
let mut result = String::from_utf8(output.stdout).expect("Got invalid UTF8 from node");
let mut result =
String::from_utf8(output.stdout).with_context(|| "Got invalid UTF8 from node")?;
result.push('\n');
Ok(result)
}

View file

@ -1172,12 +1172,12 @@ mod tests {
Variable {
name: "identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("\\w+"),
rule: Rule::pattern("\\w+", ""),
},
Variable {
name: "foo_identifier".to_string(),
kind: VariableType::Named,
rule: Rule::pattern("[\\w-]+"),
rule: Rule::pattern("[\\w-]+", ""),
},
],
..Default::default()
@ -1275,8 +1275,8 @@ mod tests {
name: "script".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::field("a".to_string(), Rule::pattern("hi")),
Rule::field("b".to_string(), Rule::pattern("bye")),
Rule::field("a".to_string(), Rule::pattern("hi", "")),
Rule::field("b".to_string(), Rule::pattern("bye", "")),
]),
}],
..Default::default()

View file

@ -19,6 +19,7 @@ enum RuleJSON {
},
PATTERN {
value: String,
flags: Option<String>,
},
SYMBOL {
name: String,
@ -143,7 +144,21 @@ fn parse_rule(json: RuleJSON) -> Rule {
} => Rule::alias(parse_rule(*content), value, named),
RuleJSON::BLANK => Rule::Blank,
RuleJSON::STRING { value } => Rule::String(value),
RuleJSON::PATTERN { value } => Rule::Pattern(value),
RuleJSON::PATTERN { value, flags } => Rule::Pattern(
value,
flags.map_or(String::new(), |f| {
f.chars()
.filter(|c| {
if *c != 'i' {
eprintln!("Warning: unsupported flag {c}");
false
} else {
*c != 'u' // silently ignore unicode flag
}
})
.collect()
}),
),
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),

View file

@ -139,10 +139,10 @@ pub(crate) fn expand_tokens(mut grammar: ExtractedLexicalGrammar) -> Result<Lexi
impl NfaBuilder {
fn expand_rule(&mut self, rule: &Rule, mut next_state_id: u32) -> Result<bool> {
match rule {
Rule::Pattern(s) => {
Rule::Pattern(s, f) => {
let s = preprocess_regex(s);
let ast = parse::Parser::new().parse(&s)?;
self.expand_regex(&ast, next_state_id)
self.expand_regex(&ast, next_state_id, f.contains('i'))
}
Rule::String(s) => {
for c in s.chars().rev() {
@ -210,12 +210,42 @@ impl NfaBuilder {
}
}
fn expand_regex(&mut self, ast: &Ast, mut next_state_id: u32) -> Result<bool> {
fn expand_regex(
&mut self,
ast: &Ast,
mut next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
fn inverse_char(c: char) -> char {
match c {
'a'..='z' => (c as u8 - b'a' + b'A') as char,
'A'..='Z' => (c as u8 - b'A' + b'a') as char,
c => c,
}
}
fn with_inverse_char(mut chars: CharacterSet) -> CharacterSet {
for char in chars.clone().chars() {
let inverted = inverse_char(char);
if char != inverted {
chars = chars.add_char(inverted);
}
}
chars
}
match ast {
Ast::Empty(_) => Ok(false),
Ast::Flags(_) => Err(anyhow!("Regex error: Flags are not supported")),
Ast::Literal(literal) => {
self.push_advance(CharacterSet::from_char(literal.c), next_state_id);
let mut char_set = CharacterSet::from_char(literal.c);
if case_insensitive {
let inverted = inverse_char(literal.c);
if literal.c != inverted {
char_set = char_set.add_char(inverted);
}
}
self.push_advance(char_set, next_state_id);
Ok(true)
}
Ast::Dot(_) => {
@ -229,6 +259,9 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
@ -237,6 +270,9 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
@ -245,48 +281,56 @@ impl NfaBuilder {
if class.negated {
chars = chars.negate();
}
if case_insensitive {
chars = with_inverse_char(chars);
}
self.push_advance(chars, next_state_id);
Ok(true)
}
},
Ast::Repetition(repetition) => match repetition.op.kind {
RepetitionKind::ZeroOrOne => {
self.expand_zero_or_one(&repetition.ast, next_state_id)
self.expand_zero_or_one(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::OneOrMore => {
self.expand_one_or_more(&repetition.ast, next_state_id)
self.expand_one_or_more(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::ZeroOrMore => {
self.expand_zero_or_more(&repetition.ast, next_state_id)
self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)
}
RepetitionKind::Range(RepetitionRange::Exactly(count)) => {
self.expand_count(&repetition.ast, count, next_state_id)
self.expand_count(&repetition.ast, count, next_state_id, case_insensitive)
}
RepetitionKind::Range(RepetitionRange::AtLeast(min)) => {
if self.expand_zero_or_more(&repetition.ast, next_state_id)? {
self.expand_count(&repetition.ast, min, next_state_id)
if self.expand_zero_or_more(&repetition.ast, next_state_id, case_insensitive)? {
self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)
} else {
Ok(false)
}
}
RepetitionKind::Range(RepetitionRange::Bounded(min, max)) => {
let mut result = self.expand_count(&repetition.ast, min, next_state_id)?;
let mut result =
self.expand_count(&repetition.ast, min, next_state_id, case_insensitive)?;
for _ in min..max {
if result {
next_state_id = self.nfa.last_state_id();
}
if self.expand_zero_or_one(&repetition.ast, next_state_id)? {
if self.expand_zero_or_one(
&repetition.ast,
next_state_id,
case_insensitive,
)? {
result = true;
}
}
Ok(result)
}
},
Ast::Group(group) => self.expand_regex(&group.ast, next_state_id),
Ast::Group(group) => self.expand_regex(&group.ast, next_state_id, case_insensitive),
Ast::Alternation(alternation) => {
let mut alternative_state_ids = Vec::new();
for ast in alternation.asts.iter() {
if self.expand_regex(&ast, next_state_id)? {
if self.expand_regex(&ast, next_state_id, case_insensitive)? {
alternative_state_ids.push(self.nfa.last_state_id());
} else {
alternative_state_ids.push(next_state_id);
@ -304,7 +348,7 @@ impl NfaBuilder {
Ast::Concat(concat) => {
let mut result = false;
for ast in concat.asts.iter().rev() {
if self.expand_regex(&ast, next_state_id)? {
if self.expand_regex(&ast, next_state_id, case_insensitive)? {
result = true;
next_state_id = self.nfa.last_state_id();
}
@ -335,13 +379,18 @@ impl NfaBuilder {
}
}
fn expand_one_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
fn expand_one_or_more(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
self.nfa.states.push(NfaState::Accept {
variable_index: 0,
precedence: 0,
}); // Placeholder for split
let split_state_id = self.nfa.last_state_id();
if self.expand_regex(&ast, split_state_id)? {
if self.expand_regex(&ast, split_state_id, case_insensitive)? {
self.nfa.states[split_state_id as usize] =
NfaState::Split(self.nfa.last_state_id(), next_state_id);
Ok(true)
@ -351,8 +400,13 @@ impl NfaBuilder {
}
}
fn expand_zero_or_one(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
if self.expand_regex(ast, next_state_id)? {
fn expand_zero_or_one(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
if self.expand_regex(ast, next_state_id, case_insensitive)? {
self.push_split(next_state_id);
Ok(true)
} else {
@ -360,8 +414,13 @@ impl NfaBuilder {
}
}
fn expand_zero_or_more(&mut self, ast: &Ast, next_state_id: u32) -> Result<bool> {
if self.expand_one_or_more(&ast, next_state_id)? {
fn expand_zero_or_more(
&mut self,
ast: &Ast,
next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
if self.expand_one_or_more(&ast, next_state_id, case_insensitive)? {
self.push_split(next_state_id);
Ok(true)
} else {
@ -369,10 +428,16 @@ impl NfaBuilder {
}
}
fn expand_count(&mut self, ast: &Ast, count: u32, mut next_state_id: u32) -> Result<bool> {
fn expand_count(
&mut self,
ast: &Ast,
count: u32,
mut next_state_id: u32,
case_insensitive: bool,
) -> Result<bool> {
let mut result = false;
for _ in 0..count {
if self.expand_regex(ast, next_state_id)? {
if self.expand_regex(ast, next_state_id, case_insensitive)? {
result = true;
next_state_id = self.nfa.last_state_id();
}
@ -475,7 +540,9 @@ impl NfaBuilder {
.add_char(' ')
.add_char('\t')
.add_char('\r')
.add_char('\n'),
.add_char('\n')
.add_char('\x0B')
.add_char('\x0C'),
ClassPerlKind::Word => CharacterSet::empty()
.add_char('_')
.add_range('A', 'Z')
@ -563,7 +630,7 @@ mod tests {
let table = [
// regex with sequences and alternatives
Row {
rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?")],
rules: vec![Rule::pattern("(a|b|c)d(e|f|g)h?", "")],
separators: vec![],
examples: vec![
("ade1", Some((0, "ade"))),
@ -574,13 +641,13 @@ mod tests {
},
// regex with repeats
Row {
rules: vec![Rule::pattern("a*")],
rules: vec![Rule::pattern("a*", "")],
separators: vec![],
examples: vec![("aaa1", Some((0, "aaa"))), ("b", Some((0, "")))],
},
// regex with repeats in sequences
Row {
rules: vec![Rule::pattern("a((bc)+|(de)*)f")],
rules: vec![Rule::pattern("a((bc)+|(de)*)f", "")],
separators: vec![],
examples: vec![
("af1", Some((0, "af"))),
@ -591,13 +658,13 @@ mod tests {
},
// regex with character ranges
Row {
rules: vec![Rule::pattern("[a-fA-F0-9]+")],
rules: vec![Rule::pattern("[a-fA-F0-9]+", "")],
separators: vec![],
examples: vec![("A1ff0.", Some((0, "A1ff0")))],
},
// regex with perl character classes
Row {
rules: vec![Rule::pattern("\\w\\d\\s")],
rules: vec![Rule::pattern("\\w\\d\\s", "")],
separators: vec![],
examples: vec![("_0 ", Some((0, "_0 ")))],
},
@ -611,7 +678,7 @@ mod tests {
Row {
rules: vec![Rule::repeat(Rule::seq(vec![
Rule::string("{"),
Rule::pattern("[a-f]+"),
Rule::pattern("[a-f]+", ""),
Rule::string("}"),
]))],
separators: vec![],
@ -624,9 +691,9 @@ mod tests {
// longest match rule
Row {
rules: vec![
Rule::pattern("a|bc"),
Rule::pattern("aa"),
Rule::pattern("bcd"),
Rule::pattern("a|bc", ""),
Rule::pattern("aa", ""),
Rule::pattern("bcd", ""),
],
separators: vec![],
examples: vec![
@ -640,7 +707,7 @@ mod tests {
},
// regex with an alternative including the empty string
Row {
rules: vec![Rule::pattern("a(b|)+c")],
rules: vec![Rule::pattern("a(b|)+c", "")],
separators: vec![],
examples: vec![
("ac.", Some((0, "ac"))),
@ -650,8 +717,8 @@ mod tests {
},
// separators
Row {
rules: vec![Rule::pattern("[a-f]+")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
rules: vec![Rule::pattern("[a-f]+", "")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
examples: vec![
(" a", Some((0, "a"))),
(" \nb", Some((0, "b"))),
@ -662,11 +729,11 @@ mod tests {
// shorter tokens with higher precedence
Row {
rules: vec![
Rule::prec(Precedence::Integer(2), Rule::pattern("abc")),
Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e")),
Rule::pattern("[a-e]+"),
Rule::prec(Precedence::Integer(2), Rule::pattern("abc", "")),
Rule::prec(Precedence::Integer(1), Rule::pattern("ab[cd]e", "")),
Rule::pattern("[a-e]+", ""),
],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s")],
separators: vec![Rule::string("\\\n"), Rule::pattern("\\s", "")],
examples: vec![
("abceef", Some((0, "abc"))),
("abdeef", Some((1, "abde"))),
@ -676,13 +743,13 @@ mod tests {
// immediate tokens with higher precedence
Row {
rules: vec![
Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+")),
Rule::prec(Precedence::Integer(1), Rule::pattern("[^a]+", "")),
Rule::immediate_token(Rule::prec(
Precedence::Integer(2),
Rule::pattern("[^ab]+"),
Rule::pattern("[^ab]+", ""),
)),
],
separators: vec![Rule::pattern("\\s")],
separators: vec![Rule::pattern("\\s", "")],
examples: vec![("cccb", Some((1, "ccc")))],
},
Row {
@ -704,7 +771,7 @@ mod tests {
// nested choices within sequences
Row {
rules: vec![Rule::seq(vec![
Rule::pattern("[0-9]+"),
Rule::pattern("[0-9]+", ""),
Rule::choice(vec![
Rule::Blank,
Rule::choice(vec![Rule::seq(vec![
@ -713,7 +780,7 @@ mod tests {
Rule::Blank,
Rule::choice(vec![Rule::string("+"), Rule::string("-")]),
]),
Rule::pattern("[0-9]+"),
Rule::pattern("[0-9]+", ""),
])]),
]),
])],
@ -730,7 +797,7 @@ mod tests {
},
// nested groups
Row {
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#, "")])],
separators: vec![],
examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
},
@ -738,11 +805,11 @@ mod tests {
Row {
rules: vec![
// Escaped forward slash (used in JS because '/' is the regex delimiter)
Rule::pattern(r#"\/"#),
Rule::pattern(r#"\/"#, ""),
// Escaped quotes
Rule::pattern(r#"\"\'"#),
Rule::pattern(r#"\"\'"#, ""),
// Quote preceded by a literal backslash
Rule::pattern(r#"[\\']+"#),
Rule::pattern(r#"[\\']+"#, ""),
],
separators: vec![],
examples: vec![
@ -754,8 +821,8 @@ mod tests {
// unicode property escapes
Row {
rules: vec![
Rule::pattern(r#"\p{L}+\P{L}+"#),
Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#),
Rule::pattern(r#"\p{L}+\P{L}+"#, ""),
Rule::pattern(r#"\p{White_Space}+\P{White_Space}+[\p{White_Space}]*"#, ""),
],
separators: vec![],
examples: vec![
@ -765,17 +832,17 @@ mod tests {
},
// unicode property escapes in bracketed sets
Row {
rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#)],
rules: vec![Rule::pattern(r#"[\p{L}\p{Nd}]+"#, "")],
separators: vec![],
examples: vec![("abΨ12٣٣, ok", Some((0, "abΨ12٣٣")))],
},
// unicode character escapes
Row {
rules: vec![
Rule::pattern(r#"\u{00dc}"#),
Rule::pattern(r#"\U{000000dd}"#),
Rule::pattern(r#"\u00de"#),
Rule::pattern(r#"\U000000df"#),
Rule::pattern(r#"\u{00dc}"#, ""),
Rule::pattern(r#"\U{000000dd}"#, ""),
Rule::pattern(r#"\u00de"#, ""),
Rule::pattern(r#"\U000000df"#, ""),
],
separators: vec![],
examples: vec![
@ -789,13 +856,13 @@ mod tests {
Row {
rules: vec![
// Un-escaped curly braces
Rule::pattern(r#"u{[0-9a-fA-F]+}"#),
Rule::pattern(r#"u{[0-9a-fA-F]+}"#, ""),
// Already-escaped curly braces
Rule::pattern(r#"\{[ab]{3}\}"#),
Rule::pattern(r#"\{[ab]{3}\}"#, ""),
// Unicode codepoints
Rule::pattern(r#"\u{1000A}"#),
Rule::pattern(r#"\u{1000A}"#, ""),
// Unicode codepoints (lowercase)
Rule::pattern(r#"\u{1000b}"#),
Rule::pattern(r#"\u{1000b}"#, ""),
],
separators: vec![],
examples: vec![
@ -807,7 +874,7 @@ mod tests {
},
// Emojis
Row {
rules: vec![Rule::pattern(r"\p{Emoji}+")],
rules: vec![Rule::pattern(r"\p{Emoji}+", "")],
separators: vec![],
examples: vec![
("🐎", Some((0, "🐎"))),
@ -820,7 +887,7 @@ mod tests {
},
// Intersection
Row {
rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+")],
rules: vec![Rule::pattern(r"[[0-7]&&[4-9]]+", "")],
separators: vec![],
examples: vec![
("456", Some((0, "456"))),
@ -833,7 +900,7 @@ mod tests {
},
// Difference
Row {
rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+")],
rules: vec![Rule::pattern(r"[[0-9]--[4-7]]+", "")],
separators: vec![],
examples: vec![
("123", Some((0, "123"))),
@ -846,7 +913,7 @@ mod tests {
},
// Symmetric difference
Row {
rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+")],
rules: vec![Rule::pattern(r"[[0-7]~~[4-9]]+", "")],
separators: vec![],
examples: vec![
("123", Some((0, "123"))),
@ -867,7 +934,7 @@ mod tests {
// [6-7]: y y
// [3-9]--[5-7]: y y y y y
// final regex: y y y y y y
rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+")],
rules: vec![Rule::pattern(r"[[[0-5]--[2-4]]~~[[3-9]--[6-7]]]+", "")],
separators: vec![],
examples: vec![
("01", Some((0, "01"))),

View file

@ -31,7 +31,7 @@ pub(super) fn extract_default_aliases(
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol.kind {
let status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
@ -63,7 +63,7 @@ pub(super) fn extract_default_aliases(
}
for symbol in syntax_grammar.extra_symbols.iter() {
let mut status = match symbol.kind {
let status = match symbol.kind {
SymbolType::External => &mut external_status_list[symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[symbol.index],
SymbolType::Terminal => &mut terminal_status_list[symbol.index],

View file

@ -49,7 +49,7 @@ pub(super) fn extract_tokens(
}) = variable.rule
{
if i > 0 && extractor.extracted_usage_counts[index] == 1 {
let mut lexical_variable = &mut lexical_variables[index];
let lexical_variable = &mut lexical_variables[index];
lexical_variable.kind = variable.kind;
lexical_variable.name = variable.name;
symbol_replacer.replacements.insert(i, index);
@ -209,7 +209,7 @@ impl TokenExtractor {
} else {
Rule::Metadata {
params: params.clone(),
rule: Box::new(self.extract_tokens_in_rule((&rule).clone())),
rule: Box::new(self.extract_tokens_in_rule(&rule)),
}
}
}
@ -320,7 +320,7 @@ mod test {
"rule_0",
Rule::repeat(Rule::seq(vec![
Rule::string("a"),
Rule::pattern("b"),
Rule::pattern("b", ""),
Rule::choice(vec![
Rule::non_terminal(1),
Rule::non_terminal(2),
@ -331,8 +331,8 @@ mod test {
]),
])),
),
Variable::named("rule_1", Rule::pattern("e")),
Variable::named("rule_2", Rule::pattern("b")),
Variable::named("rule_1", Rule::pattern("e", "")),
Variable::named("rule_2", Rule::pattern("b", "")),
Variable::named(
"rule_3",
Rule::seq(vec![Rule::non_terminal(2), Rule::Blank]),
@ -378,12 +378,12 @@ mod test {
lexical_grammar.variables,
vec![
Variable::anonymous("a", Rule::string("a")),
Variable::auxiliary("rule_0_token1", Rule::pattern("b")),
Variable::auxiliary("rule_0_token1", Rule::pattern("b", "")),
Variable::auxiliary(
"rule_0_token2",
Rule::repeat(Rule::choice(vec![Rule::string("c"), Rule::string("d"),]))
),
Variable::named("rule_1", Rule::pattern("e")),
Variable::named("rule_1", Rule::pattern("e", "")),
]
);
}
@ -411,7 +411,7 @@ mod test {
fn test_extracting_extra_symbols() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::string("x")),
Variable::named("comment", Rule::pattern("//.*")),
Variable::named("comment", Rule::pattern("//.*", "")),
]);
grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];

View file

@ -203,6 +203,12 @@ pub(super) fn process_inlines(
lexical_grammar.variables[symbol.index].name,
))
}
SymbolType::NonTerminal if symbol.index == 0 => {
return Err(anyhow!(
"Rule `{}` cannot be inlined because it is the first rule",
grammar.variables[symbol.index].name,
))
}
_ => {}
}
}

View file

@ -129,6 +129,7 @@ impl Generator {
}
self.add_lex_modes_list();
self.add_parse_table();
if !self.syntax_grammar.external_tokens.is_empty() {
self.add_external_token_enum();
@ -136,7 +137,6 @@ impl Generator {
self.add_external_scanner_states_list();
}
self.add_parse_table();
self.add_parser_export();
self.buffer
@ -152,49 +152,51 @@ impl Generator {
self.symbol_ids[&Symbol::end()].clone(),
);
self.symbol_map = self
.parse_table
.symbols
.iter()
.map(|symbol| {
let mut mapping = symbol;
self.symbol_map = HashMap::new();
// There can be multiple symbols in the grammar that have the same name and kind,
// due to simple aliases. When that happens, ensure that they map to the same
// public-facing symbol. If one of the symbols is not aliased, choose that one
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
// numeric value.
if let Some(alias) = self.default_aliases.get(symbol) {
let kind = alias.kind();
for other_symbol in &self.parse_table.symbols {
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
if other_symbol < mapping && other_alias == alias {
mapping = other_symbol;
for symbol in self.parse_table.symbols.iter() {
let mut mapping = symbol;
// There can be multiple symbols in the grammar that have the same name and kind,
// due to simple aliases. When that happens, ensure that they map to the same
// public-facing symbol. If one of the symbols is not aliased, choose that one
// to be the public-facing symbol. Otherwise, pick the symbol with the lowest
// numeric value.
if let Some(alias) = self.default_aliases.get(symbol) {
let kind = alias.kind();
for other_symbol in &self.parse_table.symbols {
if let Some(other_alias) = self.default_aliases.get(other_symbol) {
if other_symbol < mapping && other_alias == alias {
mapping = other_symbol;
}
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
mapping = other_symbol;
break;
}
}
}
// Two anonymous tokens with different flags but the same string value
// should be represented with the same symbol in the public API. Examples:
// * "<" and token(prec(1, "<"))
// * "(" and token.immediate("(")
else if symbol.is_terminal() {
let metadata = self.metadata_for_symbol(*symbol);
for other_symbol in &self.parse_table.symbols {
let other_metadata = self.metadata_for_symbol(*other_symbol);
if other_metadata == metadata {
if let Some(mapped) = self.symbol_map.get(other_symbol) {
if mapped == symbol {
break;
}
} else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
mapping = other_symbol;
break;
}
}
}
// Two anonymous tokens with different flags but the same string value
// should be represented with the same symbol in the public API. Examples:
// * "<" and token(prec(1, "<"))
// * "(" and token.immediate("(")
else if symbol.is_terminal() {
let metadata = self.metadata_for_symbol(*symbol);
for other_symbol in &self.parse_table.symbols {
let other_metadata = self.metadata_for_symbol(*other_symbol);
if other_metadata == metadata {
mapping = other_symbol;
break;
}
mapping = other_symbol;
break;
}
}
}
(*symbol, *mapping)
})
.collect();
self.symbol_map.insert(*symbol, *mapping);
}
for production_info in &self.parse_table.production_infos {
// Build a list of all field names
@ -254,7 +256,7 @@ impl Generator {
}
fn add_includes(&mut self) {
add_line!(self, "#include <tree_sitter/parser.h>");
add_line!(self, "#include \"tree_sitter/parser.h\"");
add_line!(self, "");
}
@ -336,7 +338,7 @@ impl Generator {
}
fn add_symbol_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_symbol_identifiers {{");
indent!(self);
self.symbol_order.insert(Symbol::end(), 0);
let mut i = 1;
@ -408,7 +410,7 @@ impl Generator {
}
fn add_field_name_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_field_identifiers {{");
indent!(self);
for (i, field_name) in self.field_names.iter().enumerate() {
add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
@ -764,7 +766,6 @@ impl Generator {
indent!(self);
add_line!(self, "START_LEXER();");
add_line!(self, "eof = lexer->eof(lexer);");
add_line!(self, "switch (state) {{");
indent!(self);
@ -879,14 +880,23 @@ impl Generator {
add!(self, " ||{}", line_break);
}
if range.end == range.start {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
} else if range.end as u32 == range.start as u32 + 1 {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "lookahead == ");
self.add_character(range.start);
add!(self, " ||{}lookahead == ", line_break);
self.add_character(range.end);
} else {
if range.start == '\0' {
add!(self, "!eof && ");
}
add!(self, "(");
self.add_character(range.start);
add!(self, " <= lookahead && lookahead <= ");
@ -1016,7 +1026,7 @@ impl Generator {
}
fn add_external_token_enum(&mut self) {
add_line!(self, "enum {{");
add_line!(self, "enum ts_external_scanner_symbol_identifiers {{");
indent!(self);
for i in 0..self.syntax_grammar.external_tokens.len() {
add_line!(
@ -1525,54 +1535,93 @@ impl Generator {
fn sanitize_identifier(&self, name: &str) -> String {
let mut result = String::with_capacity(name.len());
for c in name.chars() {
if ('a' <= c && c <= 'z')
|| ('A' <= c && c <= 'Z')
|| ('0' <= c && c <= '9')
|| c == '_'
{
if c.is_ascii_alphanumeric() || c == '_' {
result.push(c);
} else {
let replacement = match c {
'~' => "TILDE",
'`' => "BQUOTE",
'!' => "BANG",
'@' => "AT",
'#' => "POUND",
'$' => "DOLLAR",
'%' => "PERCENT",
'^' => "CARET",
'&' => "AMP",
'*' => "STAR",
'(' => "LPAREN",
')' => "RPAREN",
'-' => "DASH",
'+' => "PLUS",
'=' => "EQ",
'{' => "LBRACE",
'}' => "RBRACE",
'[' => "LBRACK",
']' => "RBRACK",
'\\' => "BSLASH",
'|' => "PIPE",
':' => "COLON",
';' => "SEMI",
'"' => "DQUOTE",
'\'' => "SQUOTE",
'<' => "LT",
'>' => "GT",
',' => "COMMA",
'.' => "DOT",
'?' => "QMARK",
'/' => "SLASH",
'\n' => "LF",
'\r' => "CR",
'\t' => "TAB",
_ => continue,
};
if !result.is_empty() && !result.ends_with("_") {
result.push('_');
'special_chars: {
let replacement = match c {
' ' if name.len() == 1 => "SPACE",
'~' => "TILDE",
'`' => "BQUOTE",
'!' => "BANG",
'@' => "AT",
'#' => "POUND",
'$' => "DOLLAR",
'%' => "PERCENT",
'^' => "CARET",
'&' => "AMP",
'*' => "STAR",
'(' => "LPAREN",
')' => "RPAREN",
'-' => "DASH",
'+' => "PLUS",
'=' => "EQ",
'{' => "LBRACE",
'}' => "RBRACE",
'[' => "LBRACK",
']' => "RBRACK",
'\\' => "BSLASH",
'|' => "PIPE",
':' => "COLON",
';' => "SEMI",
'"' => "DQUOTE",
'\'' => "SQUOTE",
'<' => "LT",
'>' => "GT",
',' => "COMMA",
'.' => "DOT",
'?' => "QMARK",
'/' => "SLASH",
'\n' => "LF",
'\r' => "CR",
'\t' => "TAB",
'\0' => "NULL",
'\u{0001}' => "SOH",
'\u{0002}' => "STX",
'\u{0003}' => "ETX",
'\u{0004}' => "EOT",
'\u{0005}' => "ENQ",
'\u{0006}' => "ACK",
'\u{0007}' => "BEL",
'\u{0008}' => "BS",
'\u{000b}' => "VTAB",
'\u{000c}' => "FF",
'\u{000e}' => "SO",
'\u{000f}' => "SI",
'\u{0010}' => "DLE",
'\u{0011}' => "DC1",
'\u{0012}' => "DC2",
'\u{0013}' => "DC3",
'\u{0014}' => "DC4",
'\u{0015}' => "NAK",
'\u{0016}' => "SYN",
'\u{0017}' => "ETB",
'\u{0018}' => "CAN",
'\u{0019}' => "EM",
'\u{001a}' => "SUB",
'\u{001b}' => "ESC",
'\u{001c}' => "FS",
'\u{001d}' => "GS",
'\u{001e}' => "RS",
'\u{001f}' => "US",
'\u{007F}' => "DEL",
'\u{FEFF}' => "BOM",
'\u{0080}'..='\u{FFFF}' => {
result.push_str(&format!("u{:04x}", c as u32));
break 'special_chars;
}
'\u{10000}'..='\u{10FFFF}' => {
result.push_str(&format!("U{:08x}", c as u32));
break 'special_chars;
}
'0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
' ' => break 'special_chars,
};
if !result.is_empty() && !result.ends_with("_") {
result.push('_');
}
result += replacement;
}
result += replacement;
}
}
result
@ -1585,10 +1634,19 @@ impl Generator {
'\"' => result += "\\\"",
'?' => result += "\\?",
'\\' => result += "\\\\",
'\u{0007}' => result += "\\a",
'\u{0008}' => result += "\\b",
'\u{000b}' => result += "\\v",
'\u{000c}' => result += "\\f",
'\n' => result += "\\n",
'\r' => result += "\\r",
'\t' => result += "\\t",
'\0' => result += "\\0",
'\u{0001}'..='\u{001f}' => result += &format!("\\x{:02x}", c as u32),
'\u{007F}'..='\u{FFFF}' => result += &format!("\\u{:04x}", c as u32),
'\u{10000}'..='\u{10FFFF}' => {
result.push_str(&format!("\\U{:08x}", c as u32));
}
_ => result.push(c),
}
}

View file

@ -56,7 +56,7 @@ pub(crate) struct Symbol {
pub(crate) enum Rule {
Blank,
String(String),
Pattern(String),
Pattern(String, String),
NamedSymbol(String),
Symbol(Symbol),
Choice(Vec<Rule>),
@ -187,8 +187,8 @@ impl Rule {
Rule::String(value.to_string())
}
pub fn pattern(value: &'static str) -> Self {
Rule::Pattern(value.to_string())
pub fn pattern(value: &'static str, flags: &'static str) -> Self {
Rule::Pattern(value.to_string(), flags.to_string())
}
}

View file

@ -1,4 +1,3 @@
use super::util;
use ansi_term::Color;
use anyhow::Result;
use lazy_static::lazy_static;
@ -281,7 +280,7 @@ fn style_to_css(style: ansi_term::Style) -> String {
fn write_color(buffer: &mut String, color: Color) {
if let Color::RGB(r, g, b) = &color {
write!(buffer, "color: #{:x?}{:x?}{:x?}", r, g, b).unwrap()
write!(buffer, "color: #{r:02x}{g:02x}{b:02x}").unwrap()
} else {
write!(
buffer,
@ -349,7 +348,7 @@ pub fn ansi(
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, config.apply_all_captures)
})?;
let mut style_stack = vec![theme.default_style().ansi];
@ -385,17 +384,17 @@ pub fn html(
config: &HighlightConfiguration,
quiet: bool,
print_time: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let time = Instant::now();
let cancellation_flag = util::cancel_on_stdin();
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| {
loader.highlight_config_for_injection_string(string)
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string, config.apply_all_captures)
})?;
let mut renderer = HtmlRenderer::new();
@ -448,7 +447,7 @@ mod tests {
env::set_var("COLORTERM", "");
parse_style(&mut style, Value::String(DARK_CYAN.to_string()));
assert_eq!(style.ansi.foreground, Some(Color::Fixed(36)));
assert_eq!(style.css, Some("style=\'color: #0af87\'".to_string()));
assert_eq!(style.css, Some("style=\'color: #00af87\'".to_string()));
// junglegreen is not an ANSI color and is preserved when the terminal supports it
env::set_var("COLORTERM", "truecolor");

View file

@ -1,3 +1,5 @@
#![doc = include_str!("../README.md")]
pub mod generate;
pub mod highlight;
pub mod logger;
@ -14,3 +16,7 @@ pub mod wasm;
#[cfg(test)]
mod tests;
// To run compile fail tests
#[cfg(doctest)]
mod tests;

View file

@ -1,12 +1,14 @@
use anyhow::{anyhow, Context, Result};
use anyhow::{anyhow, Context, Error, Result};
use clap::{App, AppSettings, Arg, SubCommand};
use glob::glob;
use std::path::Path;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::{env, fs, u64};
use tree_sitter::{Parser, WasmStore};
use tree_sitter::{ffi, Parser, Point, WasmStore};
use tree_sitter_cli::{
generate, highlight, logger, parse, playground, query, tags, test, test_highlight, test_tags,
util, wasm,
generate, highlight, logger,
parse::{self, ParseFileOptions, ParseOutput},
playground, query, tags, test, test_highlight, test_tags, util, wasm,
};
use tree_sitter_config::Config;
use tree_sitter_highlight::Highlighter;
@ -82,6 +84,9 @@ fn run() -> Result<()> {
let wasm_arg = Arg::with_name("wasm")
.long("wasm")
.help("compile parsers to wasm instead of native dynamic libraries");
let apply_all_captures_arg = Arg::with_name("apply-all-captures")
.help("Apply all captures to highlights")
.long("apply-all-captures");
let matches = App::new("tree-sitter")
.author("Max Brunsfeld <maxbrunsfeld@gmail.com>")
@ -113,13 +118,33 @@ fn run() -> Result<()> {
)),
)
.arg(Arg::with_name("no-bindings").long("no-bindings"))
.arg(
Arg::with_name("build")
.long("build")
.short("b")
.help("Compile all defined languages in the current dir"),
)
.arg(&debug_build_arg)
.arg(
Arg::with_name("libdir")
.long("libdir")
.takes_value(true)
.value_name("path"),
)
.arg(
Arg::with_name("report-states-for-rule")
.long("report-states-for-rule")
.value_name("rule-name")
.takes_value(true),
)
.arg(Arg::with_name("no-minimize").long("no-minimize")),
.arg(
Arg::with_name("js-runtime")
.long("js-runtime")
.takes_value(true)
.value_name("executable")
.env("TREE_SITTER_JS_RUNTIME")
.help("Use a JavaScript runtime other than node"),
),
)
.subcommand(
SubCommand::with_name("parse")
@ -132,7 +157,8 @@ fn run() -> Result<()> {
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg)
.arg(Arg::with_name("debug-xml").long("xml").short("x"))
.arg(Arg::with_name("output-dot").long("dot"))
.arg(Arg::with_name("output-xml").long("xml").short("x"))
.arg(
Arg::with_name("stat")
.help("Show parsing statistic")
@ -155,6 +181,12 @@ fn run() -> Result<()> {
.takes_value(true)
.multiple(true)
.number_of_values(1),
)
.arg(
Arg::with_name("encoding")
.help("The encoding of the input files")
.long("encoding")
.takes_value(true),
),
)
.subcommand(
@ -167,6 +199,8 @@ fn run() -> Result<()> {
.index(1)
.required(true),
)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg.clone().index(2))
.arg(
@ -175,6 +209,12 @@ fn run() -> Result<()> {
.long("byte-range")
.takes_value(true),
)
.arg(
Arg::with_name("row-range")
.help("The range of rows in which the query will be executed")
.long("row-range")
.takes_value(true),
)
.arg(&scope_arg)
.arg(Arg::with_name("captures").long("captures").short("c"))
.arg(Arg::with_name("test").long("test")),
@ -208,7 +248,8 @@ fn run() -> Result<()> {
.arg(&debug_arg)
.arg(&debug_build_arg)
.arg(&debug_graph_arg)
.arg(&wasm_arg),
.arg(&wasm_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
SubCommand::with_name("highlight")
@ -219,11 +260,31 @@ fn run() -> Result<()> {
.long("html")
.short("H"),
)
.arg(
Arg::with_name("check")
.help("Check that highlighting captures conform strictly to standards")
.long("check"),
)
.arg(
Arg::with_name("captures-path")
.help("Path to a file with captures")
.long("captures-path")
.takes_value(true),
)
.arg(
Arg::with_name("query-paths")
.help("Paths to files with queries")
.long("query-paths")
.takes_value(true)
.multiple(true)
.number_of_values(1),
)
.arg(&scope_arg)
.arg(&time_arg)
.arg(&quiet_arg)
.arg(&paths_file_arg)
.arg(&paths_arg),
.arg(&paths_arg)
.arg(&apply_all_captures_arg),
)
.subcommand(
SubCommand::with_name("build-wasm")
@ -279,6 +340,10 @@ fn run() -> Result<()> {
("generate", Some(matches)) => {
let grammar_path = matches.value_of("grammar-path");
let debug_build = matches.is_present("debug-build");
let build = matches.is_present("build");
let libdir = matches.value_of("libdir");
let js_runtime = matches.value_of("js-runtime");
let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
if matches.is_present("report-states") {
Some("")
@ -289,16 +354,18 @@ fn run() -> Result<()> {
if matches.is_present("log") {
logger::init();
}
let abi_version =
matches
.value_of("abi-version")
.map_or(DEFAULT_GENERATE_ABI_VERSION, |version| {
if version == "latest" {
tree_sitter::LANGUAGE_VERSION
} else {
version.parse().expect("invalid abi version flag")
}
});
let abi_version = matches.value_of("abi-version").map_or(
Ok::<_, Error>(DEFAULT_GENERATE_ABI_VERSION),
|version| {
Ok(if version == "latest" {
tree_sitter::LANGUAGE_VERSION
} else {
version
.parse()
.with_context(|| "invalid abi version flag")?
})
},
)?;
let generate_bindings = !matches.is_present("no-bindings");
generate::generate_parser_in_directory(
&current_dir,
@ -306,7 +373,15 @@ fn run() -> Result<()> {
abi_version,
generate_bindings,
report_symbol_name,
js_runtime,
)?;
if build {
if let Some(path) = libdir {
loader = loader::Loader::with_parser_lib_path(PathBuf::from(path));
}
loader.use_debug_build(debug_build);
loader.languages_at_path(&current_dir)?;
}
}
("test", Some(matches)) => {
@ -317,6 +392,12 @@ fn run() -> Result<()> {
let filter = matches.value_of("filter");
let wasm = matches.is_present("wasm");
let mut parser = Parser::new();
let apply_all_captures = matches.is_present("apply-all-captures");
if debug {
// For augmenting debug logging in external scanners
env::set_var("TREE_SITTER_DEBUG", "1");
}
loader.use_debug_build(debug_build);
@ -364,7 +445,12 @@ fn run() -> Result<()> {
if let Some(store) = store.take() {
highlighter.parser().set_wasm_store(store).unwrap();
}
test_highlight::test_highlights(&loader, &mut highlighter, &test_highlight_dir)?;
test_highlight::test_highlights(
&loader,
&mut highlighter,
&test_highlight_dir,
apply_all_captures,
)?;
store = highlighter.parser().take_wasm_store();
}
@ -382,14 +468,33 @@ fn run() -> Result<()> {
let debug = matches.is_present("debug");
let debug_graph = matches.is_present("debug-graph");
let debug_build = matches.is_present("debug-build");
let debug_xml = matches.is_present("debug-xml");
let quiet = matches.is_present("quiet");
let output = if matches.is_present("output-dot") {
ParseOutput::Dot
} else if matches.is_present("output-xml") {
ParseOutput::Xml
} else if matches.is_present("quiet") {
ParseOutput::Quiet
} else {
ParseOutput::Normal
};
let encoding =
matches
.values_of("encoding")
.map_or(Ok(None), |mut e| match e.next() {
Some("utf16") => Ok(Some(ffi::TSInputEncodingUTF16)),
Some("utf8") => Ok(Some(ffi::TSInputEncodingUTF8)),
Some(_) => Err(anyhow!("Invalid encoding. Expected one of: utf8, utf16")),
None => Ok(None),
})?;
let time = matches.is_present("time");
let wasm = matches.is_present("wasm");
let edits = matches
.values_of("edits")
.map_or(Vec::new(), |e| e.collect());
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let mut parser = Parser::new();
if debug {
@ -430,19 +535,21 @@ fn run() -> Result<()> {
.set_language(language)
.context("incompatible language")?;
let this_file_errored = parse::parse_file_at_path(
&mut parser,
let opts = ParseFileOptions {
language,
path,
&edits,
edits: &edits,
max_path_length,
quiet,
time,
output,
print_time: time,
timeout,
debug,
debug_graph,
debug_xml,
Some(&cancellation_flag),
)?;
cancellation_flag: Some(&cancellation_flag),
encoding,
};
let this_file_errored = parse::parse_file_at_path(&mut parser, opts)?;
if should_track_stats {
stats.total_parses += 1;
@ -465,6 +572,8 @@ fn run() -> Result<()> {
("query", Some(matches)) => {
let ordered_captures = matches.values_of("captures").is_some();
let quiet = matches.values_of("quiet").is_some();
let time = matches.values_of("time").is_some();
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let loader_config = config.get()?;
loader.find_all_languages(&loader_config)?;
@ -474,9 +583,17 @@ fn run() -> Result<()> {
matches.value_of("scope"),
)?;
let query_path = Path::new(matches.value_of("query-path").unwrap());
let range = matches.value_of("byte-range").map(|br| {
let r: Vec<&str> = br.split(":").collect();
r[0].parse().unwrap()..r[1].parse().unwrap()
let byte_range = matches.value_of("byte-range").and_then(|arg| {
let mut parts = arg.split(":");
let start = parts.next()?.parse().ok()?;
let end = parts.next().unwrap().parse().ok()?;
Some(start..end)
});
let point_range = matches.value_of("row-range").and_then(|arg| {
let mut parts = arg.split(":");
let start = parts.next()?.parse().ok()?;
let end = parts.next().unwrap().parse().ok()?;
Some(Point::new(start, 0)..Point::new(end, 0))
});
let should_test = matches.is_present("test");
query::query_files_at_paths(
@ -484,8 +601,11 @@ fn run() -> Result<()> {
paths,
query_path,
ordered_captures,
range,
byte_range,
point_range,
should_test,
quiet,
time,
)?;
}
@ -511,13 +631,15 @@ fn run() -> Result<()> {
let time = matches.is_present("time");
let quiet = matches.is_present("quiet");
let html_mode = quiet || matches.is_present("html");
let should_check = matches.is_present("check");
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let apply_all_captures = matches.is_present("apply-all-captures");
if html_mode && !quiet {
println!("{}", highlight::HTML_HEADER);
}
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let mut lang = None;
if let Some(scope) = matches.value_of("scope") {
@ -527,6 +649,15 @@ fn run() -> Result<()> {
}
}
let query_paths = matches.values_of("query-paths").map_or(None, |e| {
Some(
e.collect::<Vec<_>>()
.into_iter()
.map(|s| s.to_string())
.collect::<Vec<_>>(),
)
});
for path in paths {
let path = Path::new(&path);
let (language, language_config) = match lang {
@ -540,7 +671,45 @@ fn run() -> Result<()> {
},
};
if let Some(highlight_config) = language_config.highlight_config(language)? {
if let Some(highlight_config) = language_config.highlight_config(
language,
apply_all_captures,
query_paths.as_deref(),
)? {
if should_check {
let names = if let Some(path) = matches.value_of("captures-path") {
let path = Path::new(path);
let file = fs::read_to_string(path)?;
let capture_names = file
.lines()
.filter_map(|line| {
if line.trim().is_empty() || line.trim().starts_with(';') {
return None;
}
line.split(';').next().map(|s| s.trim().trim_matches('"'))
})
.collect::<HashSet<_>>();
highlight_config.nonconformant_capture_names(&capture_names)
} else {
highlight_config.nonconformant_capture_names(&HashSet::new())
};
if names.is_empty() {
eprintln!("All highlight captures conform to standards.");
} else {
eprintln!(
"Non-standard highlight {} detected:",
if names.len() > 1 {
"captures"
} else {
"capture"
}
);
for name in names {
eprintln!("* {}", name);
}
}
}
let source = fs::read(path)?;
if html_mode {
highlight::html(
@ -550,6 +719,7 @@ fn run() -> Result<()> {
highlight_config,
quiet,
time,
Some(&cancellation_flag),
)?;
} else {
highlight::ansi(
@ -582,7 +752,7 @@ fn run() -> Result<()> {
("playground", Some(matches)) => {
let open_in_browser = !matches.is_present("quiet");
playground::serve(&current_dir, open_in_browser);
playground::serve(&current_dir, open_in_browser)?;
}
("dump-languages", Some(_)) => {

View file

@ -5,7 +5,7 @@ use std::path::Path;
use std::sync::atomic::AtomicUsize;
use std::time::Instant;
use std::{fmt, fs, usize};
use tree_sitter::{InputEdit, LogType, Parser, Point, Tree};
use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree};
#[derive(Debug)]
pub struct Edit {
@ -30,36 +30,47 @@ impl fmt::Display for Stats {
}
}
pub fn parse_file_at_path(
parser: &mut Parser,
path: &Path,
edits: &Vec<&str>,
max_path_length: usize,
quiet: bool,
print_time: bool,
timeout: u64,
debug: bool,
debug_graph: bool,
debug_xml: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<bool> {
#[derive(Copy, Clone)]
pub enum ParseOutput {
Normal,
Quiet,
Xml,
Dot,
}
pub struct ParseFileOptions<'a> {
pub language: Language,
pub path: &'a Path,
pub edits: &'a [&'a str],
pub max_path_length: usize,
pub output: ParseOutput,
pub print_time: bool,
pub timeout: u64,
pub debug: bool,
pub debug_graph: bool,
pub cancellation_flag: Option<&'a AtomicUsize>,
pub encoding: Option<u32>,
}
pub fn parse_file_at_path(parser: &mut Parser, opts: ParseFileOptions) -> Result<bool> {
let mut _log_session = None;
let mut source_code =
fs::read(path).with_context(|| format!("Error reading source file {:?}", path))?;
parser.set_language(opts.language)?;
let mut source_code = fs::read(opts.path)
.with_context(|| format!("Error reading source file {:?}", opts.path))?;
// If the `--cancel` flag was passed, then cancel the parse
// when the user types a newline.
unsafe { parser.set_cancellation_flag(cancellation_flag) };
unsafe { parser.set_cancellation_flag(opts.cancellation_flag) };
// Set a timeout based on the `--time` flag.
parser.set_timeout_micros(timeout);
parser.set_timeout_micros(opts.timeout);
// Render an HTML graph if `--debug-graph` was passed
if debug_graph {
if opts.debug_graph {
_log_session = Some(util::log_graphs(parser, "log.html")?);
}
// Log to stderr if `--debug` was passed
else if debug {
else if opts.debug {
parser.set_logger(Some(Box::new(|log_type, message| {
if log_type == LogType::Lex {
io::stderr().write(b" ").unwrap();
@ -69,22 +80,44 @@ pub fn parse_file_at_path(
}
let time = Instant::now();
let tree = parser.parse(&source_code, None);
#[inline(always)]
fn is_utf16_bom(bom_bytes: &[u8]) -> bool {
bom_bytes == [0xFF, 0xFE] || bom_bytes == [0xFE, 0xFF]
}
let tree = match opts.encoding {
Some(encoding) if encoding == ffi::TSInputEncodingUTF16 => {
let source_code_utf16 = source_code
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect::<Vec<_>>();
parser.parse_utf16(&source_code_utf16, None)
}
None if source_code.len() >= 2 && is_utf16_bom(&source_code[0..2]) => {
let source_code_utf16 = source_code
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
.collect::<Vec<_>>();
parser.parse_utf16(&source_code_utf16, None)
}
_ => parser.parse(&source_code, None),
};
let stdout = io::stdout();
let mut stdout = stdout.lock();
if let Some(mut tree) = tree {
if debug_graph && !edits.is_empty() {
if opts.debug_graph && !opts.edits.is_empty() {
println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
}
for (i, edit) in edits.iter().enumerate() {
for (i, edit) in opts.edits.iter().enumerate() {
let edit = parse_edit_flag(&source_code, edit)?;
perform_edit(&mut tree, &mut source_code, &edit);
perform_edit(&mut tree, &mut source_code, &edit)?;
tree = parser.parse(&source_code, Some(&tree)).unwrap();
if debug_graph {
if opts.debug_graph {
println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
}
}
@ -93,7 +126,7 @@ pub fn parse_file_at_path(
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
let mut cursor = tree.walk();
if !quiet {
if matches!(opts.output, ParseOutput::Normal) {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
@ -149,7 +182,7 @@ pub fn parse_file_at_path(
println!("");
}
if debug_xml {
if matches!(opts.output, ParseOutput::Xml) {
let mut needs_newline = false;
let mut indent_level = 0;
let mut did_visit_children = false;
@ -204,6 +237,10 @@ pub fn parse_file_at_path(
println!("");
}
if matches!(opts.output, ParseOutput::Dot) {
util::print_tree_graph(&tree, "log.html").unwrap();
}
let mut first_error = None;
loop {
let node = cursor.node();
@ -221,13 +258,13 @@ pub fn parse_file_at_path(
}
}
if first_error.is_some() || print_time {
if first_error.is_some() || opts.print_time {
write!(
&mut stdout,
"{:width$}\t{} ms",
path.to_str().unwrap(),
opts.path.to_str().unwrap(),
duration_ms,
width = max_path_length
width = opts.max_path_length
)?;
if let Some(node) = first_error {
let start = node.start_position();
@ -256,29 +293,29 @@ pub fn parse_file_at_path(
}
return Ok(first_error.is_some());
} else if print_time {
} else if opts.print_time {
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
writeln!(
&mut stdout,
"{:width$}\t{} ms (timed out)",
path.to_str().unwrap(),
opts.path.to_str().unwrap(),
duration_ms,
width = max_path_length
width = opts.max_path_length
)?;
}
Ok(false)
}
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputEdit {
pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> Result<InputEdit> {
let start_byte = edit.position;
let old_end_byte = edit.position + edit.deleted_length;
let new_end_byte = edit.position + edit.inserted_text.len();
let start_position = position_for_offset(input, start_byte);
let old_end_position = position_for_offset(input, old_end_byte);
let start_position = position_for_offset(input, start_byte)?;
let old_end_position = position_for_offset(input, old_end_byte)?;
input.splice(start_byte..old_end_byte, edit.inserted_text.iter().cloned());
let new_end_position = position_for_offset(input, new_end_byte);
let new_end_position = position_for_offset(input, new_end_byte)?;
let edit = InputEdit {
start_byte,
old_end_byte,
@ -288,7 +325,7 @@ pub fn perform_edit(tree: &mut Tree, input: &mut Vec<u8>, edit: &Edit) -> InputE
new_end_position,
};
tree.edit(&edit);
edit
Ok(edit)
}
fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
@ -317,7 +354,7 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
let row = usize::from_str_radix(row, 10).map_err(|_| error())?;
let column = parts.next().ok_or_else(error)?;
let column = usize::from_str_radix(column, 10).map_err(|_| error())?;
offset_for_position(source_code, Point { row, column })
offset_for_position(source_code, Point { row, column })?
} else {
usize::from_str_radix(position, 10).map_err(|_| error())?
};
@ -332,31 +369,48 @@ fn parse_edit_flag(source_code: &Vec<u8>, flag: &str) -> Result<Edit> {
})
}
fn offset_for_position(input: &Vec<u8>, position: Point) -> usize {
let mut current_position = Point { row: 0, column: 0 };
for (i, c) in input.iter().enumerate() {
if *c as char == '\n' {
current_position.row += 1;
current_position.column = 0;
} else {
current_position.column += 1;
}
if current_position > position {
return i;
pub fn offset_for_position(input: &[u8], position: Point) -> Result<usize> {
let mut row = 0;
let mut offset = 0;
let mut iter = memchr::memchr_iter(b'\n', input);
loop {
if let Some(pos) = iter.next() {
if row < position.row {
row += 1;
offset = pos;
continue;
}
}
offset += 1;
break;
}
return input.len();
if position.row - row > 0 {
return Err(anyhow!("Failed to address a row: {}", position.row));
}
if let Some(pos) = iter.next() {
if (pos - offset < position.column) || (input[offset] == b'\n' && position.column > 0) {
return Err(anyhow!("Failed to address a column: {}", position.column));
};
} else if input.len() - offset < position.column {
return Err(anyhow!("Failed to address a column over the end"));
}
Ok(offset + position.column)
}
fn position_for_offset(input: &Vec<u8>, offset: usize) -> Point {
let mut result = Point { row: 0, column: 0 };
for c in &input[0..offset] {
if *c as char == '\n' {
result.row += 1;
result.column = 0;
} else {
result.column += 1;
}
pub fn position_for_offset(input: &[u8], offset: usize) -> Result<Point> {
if offset > input.len() {
return Err(anyhow!("Failed to address an offset: {offset}"));
}
result
let mut result = Point { row: 0, column: 0 };
let mut last = 0;
for pos in memchr::memchr_iter(b'\n', &input[..offset]) {
result.row += 1;
last = pos;
}
result.column = if result.row > 0 {
offset - last - 1
} else {
offset
};
Ok(result)
}

View file

@ -3,8 +3,8 @@
<title>tree-sitter THE_LANGUAGE_NAME</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.45.0/codemirror.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.18.0/clusterize.min.css">
<link rel="icon" type="image/png" href="http://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="http://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png" sizes="16x16" />
<link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-32x32.png" sizes="32x32" />
<link rel="icon" type="image/png" href="https://tree-sitter.github.io/tree-sitter/assets/images/favicon-16x16.png" sizes="16x16" />
</head>
<body>

View file

@ -1,4 +1,5 @@
use super::wasm;
use anyhow::{anyhow, Context, Result};
use std::{
borrow::Cow,
env, fs,
@ -7,12 +8,11 @@ use std::{
str::{self, FromStr as _},
};
use tiny_http::{Header, Response, Server};
use webbrowser;
macro_rules! optional_resource {
($name: tt, $path: tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
} else {
@ -21,7 +21,7 @@ macro_rules! optional_resource {
}
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn $name(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join($path)).unwrap())
} else {
@ -35,7 +35,7 @@ optional_resource!(get_playground_js, "docs/assets/js/playground.js");
optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
fn get_main_html(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
fn get_main_html(tree_sitter_dir: Option<&PathBuf>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
Cow::Owned(fs::read(tree_sitter_dir.join("cli/src/playground.html")).unwrap())
} else {
@ -43,23 +43,10 @@ fn get_main_html(tree_sitter_dir: &Option<PathBuf>) -> Cow<'static, [u8]> {
}
}
pub fn serve(grammar_path: &Path, open_in_browser: bool) {
let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
.map(|v| v.parse::<u16>().expect("Invalid port specification"))
.unwrap_or_else(
|_| get_available_port().expect(
"Couldn't find an available port, try providing a port number via the TREE_SITTER_PLAYGROUND_PORT \
environment variable"
)
);
let addr = format!(
"{}:{}",
env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned()),
port
);
pub fn serve(grammar_path: &Path, open_in_browser: bool) -> Result<()> {
let server = get_server()?;
let (grammar_name, language_wasm) = wasm::load_language_wasm_file(&grammar_path).unwrap();
let server = Server::http(&addr).expect("Failed to start web server");
let url = format!("http://{}", addr);
let url = format!("http://{}", server.server_addr());
println!("Started playground on: {}", url);
if open_in_browser {
if let Err(_) = webbrowser::open(&url) {
@ -68,13 +55,13 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
}
let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
let main_html = str::from_utf8(&get_main_html(&tree_sitter_dir))
let main_html = str::from_utf8(&get_main_html(tree_sitter_dir.as_ref()))
.unwrap()
.replace("THE_LANGUAGE_NAME", &grammar_name)
.into_bytes();
let playground_js = get_playground_js(&tree_sitter_dir);
let lib_js = get_lib_js(&tree_sitter_dir);
let lib_wasm = get_lib_wasm(&tree_sitter_dir);
let playground_js = get_playground_js(tree_sitter_dir.as_ref());
let lib_js = get_lib_js(tree_sitter_dir.as_ref());
let lib_wasm = get_lib_wasm(tree_sitter_dir.as_ref());
let html_header = Header::from_str("Content-Type: text/html").unwrap();
let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
@ -107,8 +94,12 @@ pub fn serve(grammar_path: &Path, open_in_browser: bool) {
}
_ => response(b"Not found", &html_header).with_status_code(404),
};
request.respond(res).expect("Failed to write HTTP response");
request
.respond(res)
.with_context(|| "Failed to write HTTP response")?;
}
Ok(())
}
fn redirect<'a>(url: &'a str) -> Response<&'a [u8]> {
@ -123,10 +114,30 @@ fn response<'a>(data: &'a [u8], header: &Header) -> Response<&'a [u8]> {
.with_header(header.clone())
}
fn get_available_port() -> Option<u16> {
(8000..12000).find(port_is_available)
fn get_server() -> Result<Server> {
let addr = env::var("TREE_SITTER_PLAYGROUND_ADDR").unwrap_or("127.0.0.1".to_owned());
let port = env::var("TREE_SITTER_PLAYGROUND_PORT")
.map(|v| {
v.parse::<u16>()
.with_context(|| "Invalid port specification")
})
.ok();
let listener = match port {
Some(port) => {
bind_to(&*addr, port?).with_context(|| "Failed to bind to the specified port")?
}
None => get_listener_on_available_port(&*addr)
.with_context(|| "Failed to find a free port to bind to it")?,
};
let server =
Server::from_listener(listener, None).map_err(|_| anyhow!("Failed to start web server"))?;
Ok(server)
}
fn port_is_available(port: &u16) -> bool {
TcpListener::bind(("127.0.0.1", *port)).is_ok()
fn get_listener_on_available_port(addr: &str) -> Option<TcpListener> {
(8000..12000).find_map(|port| bind_to(addr, port))
}
fn bind_to(addr: &str, port: u16) -> Option<TcpListener> {
TcpListener::bind(format!("{addr}:{port}")).ok()
}

View file

@ -5,16 +5,20 @@ use std::{
io::{self, Write},
ops::Range,
path::Path,
time::Instant,
};
use tree_sitter::{Language, Parser, Query, QueryCursor};
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
pub fn query_files_at_paths(
language: Language,
paths: Vec<String>,
query_path: &Path,
ordered_captures: bool,
range: Option<Range<usize>>,
byte_range: Option<Range<usize>>,
point_range: Option<Range<Point>>,
should_test: bool,
quiet: bool,
print_time: bool,
) -> Result<()> {
let stdout = io::stdout();
let mut stdout = stdout.lock();
@ -24,9 +28,12 @@ pub fn query_files_at_paths(
let query = Query::new(language, &query_source).with_context(|| "Query compilation failed")?;
let mut query_cursor = QueryCursor::new();
if let Some(range) = range {
if let Some(range) = byte_range {
query_cursor.set_byte_range(range);
}
if let Some(range) = point_range {
query_cursor.set_point_range(range);
}
let mut parser = Parser::new();
parser.set_language(language)?;
@ -40,22 +47,25 @@ pub fn query_files_at_paths(
fs::read(&path).with_context(|| format!("Error reading source file {:?}", path))?;
let tree = parser.parse(&source_code, None).unwrap();
let start = Instant::now();
if ordered_captures {
for (mat, capture_index) in
query_cursor.captures(&query, tree.root_node(), source_code.as_slice())
{
let capture = mat.captures[capture_index];
let capture_name = &query.capture_names()[capture.index as usize];
writeln!(
&mut stdout,
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture_name,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
if !quiet {
writeln!(
&mut stdout,
" pattern: {:>2}, capture: {} - {}, start: {}, end: {}, text: `{}`",
mat.pattern_index,
capture.index,
capture_name,
capture.node.start_position(),
capture.node.end_position(),
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
start: capture.node.start_position(),
@ -64,27 +74,31 @@ pub fn query_files_at_paths(
}
} else {
for m in query_cursor.matches(&query, tree.root_node(), source_code.as_slice()) {
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
if !quiet {
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
}
for capture in m.captures {
let start = capture.node.start_position();
let end = capture.node.end_position();
let capture_name = &query.capture_names()[capture.index as usize];
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {} - {}, start: {}, end: {}, text: `{}`",
capture.index,
capture_name,
start,
end,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {}, start: {}, end: {}",
capture_name, start, end,
)?;
if !quiet {
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {} - {}, start: {}, end: {}, text: `{}`",
capture.index,
capture_name,
start,
end,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {}, start: {}, end: {}",
capture_name, start, end,
)?;
}
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
@ -103,6 +117,9 @@ pub fn query_files_at_paths(
if should_test {
query_testing::assert_expected_captures(results, path, &mut parser, language)?
}
if print_time {
writeln!(&mut stdout, "{:?}", start.elapsed())?;
}
}
Ok(())

View file

@ -18,9 +18,20 @@ pub struct CaptureInfo {
#[derive(Debug, PartialEq, Eq)]
pub struct Assertion {
pub position: Point,
pub negative: bool,
pub expected_capture_name: String,
}
impl Assertion {
pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self {
Self {
position: Point::new(row, col),
negative,
expected_capture_name,
}
}
}
/// Parse the given source code, finding all of the comments that contain
/// highlighting assertions. Return a vector of (position, expected highlight name)
/// pairs.
@ -54,6 +65,7 @@ pub fn parse_position_comments(
// to its own column.
let mut has_left_caret = false;
let mut has_arrow = false;
let mut negative = false;
let mut arrow_end = 0;
for (i, c) in text.char_indices() {
arrow_end = i + 1;
@ -69,6 +81,19 @@ pub fn parse_position_comments(
has_left_caret = c == '<';
}
// find any ! after arrows but before capture name
if has_arrow {
for (i, c) in text[arrow_end..].char_indices() {
if c == '!' {
negative = true;
arrow_end += i + 1;
break;
} else if !c.is_whitespace() {
break;
}
}
}
// If the comment node contains an arrow and a highlight name, record the
// highlight name and the position.
if let (true, Some(mat)) =
@ -76,7 +101,8 @@ pub fn parse_position_comments(
{
assertion_ranges.push((node.start_position(), node.end_position()));
result.push(Assertion {
position: position,
position,
negative,
expected_capture_name: mat.as_str().to_string(),
});
}

View file

@ -23,7 +23,7 @@ pub fn generate_tags(
}
let mut context = TagsContext::new();
let cancellation_flag = util::cancel_on_stdin();
let cancellation_flag = util::cancel_on_signal();
let stdout = io::stdout();
let mut stdout = stdout.lock();

View file

@ -16,11 +16,11 @@ use walkdir::WalkDir;
lazy_static! {
static ref HEADER_REGEX: ByteRegex =
ByteRegexBuilder::new(r"^===+(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>([^=\r\n][^\r\n]*\r?\n)+)===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
ByteRegexBuilder::new(r"^(?P<equals>(?:=+){3,})(?P<suffix1>[^=\r\n][^\r\n]*)?\r?\n(?P<test_name>([^=\r\n][^\r\n]*\r?\n)+)===+(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n")
.multi_line(true)
.build()
.unwrap();
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^(?P<hyphens>(?:-+){3,})(?P<suffix>[^-\r\n][^\r\n]*)?\r?\n")
.multi_line(true)
.build()
.unwrap();
@ -40,6 +40,8 @@ pub enum TestEntry {
name: String,
input: Vec<u8>,
output: String,
header_delim_len: usize,
divider_delim_len: usize,
has_fields: bool,
},
}
@ -177,13 +179,15 @@ fn run_tests(
mut indent_level: i32,
failures: &mut Vec<(String, String, String)>,
update: bool,
corrected_entries: &mut Vec<(String, String, String)>,
corrected_entries: &mut Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
match test_entry {
TestEntry::Example {
name,
input,
output,
header_delim_len,
divider_delim_len,
has_fields,
} => {
if let Some(filter) = filter {
@ -191,7 +195,13 @@ fn run_tests(
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&output);
corrected_entries.push((name, input, output));
corrected_entries.push((
name,
input,
output,
header_delim_len,
divider_delim_len,
));
}
return Ok(());
}
@ -201,21 +211,31 @@ fn run_tests(
if !has_fields {
actual = strip_sexp_fields(actual);
}
for _ in 0..indent_level {
print!(" ");
}
print!("{}", " ".repeat(indent_level as usize));
if actual == output {
println!("{}", Colour::Green.paint(&name));
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&output);
corrected_entries.push((name, input, output));
corrected_entries.push((
name,
input,
output,
header_delim_len,
divider_delim_len,
));
}
} else {
if update {
let input = String::from_utf8(input).unwrap();
let output = format_sexp(&actual);
corrected_entries.push((name.clone(), input, output));
corrected_entries.push((
name.clone(),
input,
output,
header_delim_len,
divider_delim_len,
));
println!("{}", Colour::Blue.paint(&name));
} else {
println!("{}", Colour::Red.paint(&name));
@ -229,9 +249,7 @@ fn run_tests(
file_path,
} => {
if indent_level > 0 {
for _ in 0..indent_level {
print!(" ");
}
print!("{}", " ".repeat(indent_level as usize));
println!("{}:", name);
}
@ -312,27 +330,32 @@ fn format_sexp_indented(sexp: &String, initial_indent_level: u32) -> String {
formatted
}
fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> {
fn write_tests(
file_path: &Path,
corrected_entries: &Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
let mut buffer = fs::File::create(file_path)?;
write_tests_to_buffer(&mut buffer, corrected_entries)
}
fn write_tests_to_buffer(
buffer: &mut impl Write,
corrected_entries: &Vec<(String, String, String)>,
corrected_entries: &Vec<(String, String, String, usize, usize)>,
) -> Result<()> {
for (i, (name, input, output)) in corrected_entries.iter().enumerate() {
for (i, (name, input, output, header_delim_len, divider_delim_len)) in
corrected_entries.iter().enumerate()
{
if i > 0 {
write!(buffer, "\n")?;
}
write!(
buffer,
"{}\n{}\n{}\n{}\n{}\n\n{}\n",
"=".repeat(80),
"=".repeat(*header_delim_len),
name,
"=".repeat(80),
"=".repeat(*header_delim_len),
input,
"-".repeat(80),
"-".repeat(*divider_delim_len),
output.trim()
)?;
}
@ -351,9 +374,18 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
let entry = entry?;
let hidden = entry.file_name().to_str().unwrap_or("").starts_with(".");
if !hidden {
children.push(parse_tests(&entry.path())?);
children.push(entry.path());
}
}
children.sort_by(|a, b| {
a.file_name()
.unwrap_or_default()
.cmp(&b.file_name().unwrap_or_default())
});
let children = children
.iter()
.map(|path| parse_tests(path))
.collect::<io::Result<Vec<TestEntry>>>()?;
Ok(TestEntry::Group {
name,
children,
@ -387,6 +419,7 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
// Ignore any matches whose suffix does not match the first header
// suffix in the file.
let header_matches = HEADER_REGEX.captures_iter(&bytes).filter_map(|c| {
let header_delim_len = c.name("equals").map(|n| n.as_bytes().len()).unwrap_or(80);
let suffix1 = c
.name("suffix1")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
@ -398,13 +431,17 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let test_name = c
.name("test_name")
.map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string());
Some((header_range, test_name))
let res = Some((header_delim_len, header_range, test_name));
res
} else {
None
}
});
for (header_range, test_name) in header_matches.chain(Some((bytes.len()..bytes.len(), None))) {
let mut prev_header_len = 80;
for (header_delim_len, header_range, test_name) in
header_matches.chain(Some((80, bytes.len()..bytes.len(), None)))
{
// Find the longest line of dashes following each test description. That line
// separates the input from the expected output. Ignore any matches whose suffix
// does not match the first suffix in the file.
@ -412,19 +449,25 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
let divider_range = DIVIDER_REGEX
.captures_iter(&bytes[prev_header_end..header_range.start])
.filter_map(|m| {
let divider_delim_len =
m.name("hyphens").map(|m| m.as_bytes().len()).unwrap_or(80);
let suffix = m
.name("suffix")
.map(|m| String::from_utf8_lossy(m.as_bytes()));
if suffix == first_suffix {
let range = m.get(0).unwrap().range();
Some((prev_header_end + range.start)..(prev_header_end + range.end))
let res = Some((
divider_delim_len,
(prev_header_end + range.start)..(prev_header_end + range.end),
));
res
} else {
None
}
})
.max_by_key(|range| range.len());
.max_by_key(|(_, range)| range.len());
if let Some(divider_range) = divider_range {
if let Some((divider_delim_len, divider_range)) = divider_range {
if let Ok(output) = str::from_utf8(&bytes[divider_range.end..header_range.start]) {
let mut input = bytes[prev_header_end..divider_range.start].to_vec();
@ -449,12 +492,15 @@ fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>)
name: prev_name,
input,
output,
header_delim_len: prev_header_len,
divider_delim_len,
has_fields,
});
}
}
}
prev_name = test_name.unwrap_or(String::new());
prev_header_len = header_delim_len;
prev_header_end = header_range.end;
}
TestEntry::Group {
@ -505,12 +551,16 @@ d
name: "The first test".to_string(),
input: "\na b c\n".as_bytes().to_vec(),
output: "(a (b c))".to_string(),
header_delim_len: 15,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "The second test".to_string(),
input: "d".as_bytes().to_vec(),
output: "(d)".to_string(),
header_delim_len: 16,
divider_delim_len: 3,
has_fields: false,
},
],
@ -559,12 +609,16 @@ abc
name: "Code with dashes".to_string(),
input: "abc\n---\ndefg\n----\nhijkl".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 7,
has_fields: false,
},
TestEntry::Example {
name: "Code ending with dashes".to_string(),
input: "abc\n-----------".as_bytes().to_vec(),
output: "(c (d))".to_string(),
header_delim_len: 25,
divider_delim_len: 19,
has_fields: false,
},
],
@ -608,11 +662,15 @@ abc
"title 1".to_string(),
"input 1".to_string(),
"output 1".to_string(),
80,
80,
),
(
"title 2".to_string(),
"input 2".to_string(),
"output 2".to_string(),
80,
80,
),
];
write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap();
@ -689,18 +747,24 @@ code
name: "sexp with comment".to_string(),
input: "code".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "sexp with comment between".to_string(),
input: "code".as_bytes().to_vec(),
output: "(a (b))".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "sexp with ';'".to_string(),
input: "code".as_bytes().to_vec(),
output: "(MISSING \";\")".to_string(),
header_delim_len: 25,
divider_delim_len: 3,
has_fields: false,
}
],
@ -773,18 +837,24 @@ NOT A TEST HEADER
name: "First test".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "Second test".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 18,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "Test name with = symbol".to_string(),
input: expected_input.clone(),
output: "(a)".to_string(),
header_delim_len: 25,
divider_delim_len: 3,
has_fields: false,
}
],
@ -828,12 +898,16 @@ code with ----
name: "name\nwith\nnewlines".to_string(),
input: b"a".to_vec(),
output: "(b)".to_string(),
header_delim_len: 15,
divider_delim_len: 3,
has_fields: false,
},
TestEntry::Example {
name: "name with === signs".to_string(),
input: b"code with ----".to_vec(),
output: "(d)".to_string(),
header_delim_len: 20,
divider_delim_len: 3,
has_fields: false,
}
]

View file

@ -42,41 +42,74 @@ pub fn test_highlights(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
) -> Result<()> {
println!("syntax highlighting:");
test_highlights_indented(loader, highlighter, directory, apply_all_captures, 2)
}
fn test_highlights_indented(
loader: &Loader,
highlighter: &mut Highlighter,
directory: &Path,
apply_all_captures: bool,
indent_level: usize,
) -> Result<()> {
let mut failed = false;
println!("syntax highlighting:");
for highlight_test_file in fs::read_dir(directory)? {
let highlight_test_file = highlight_test_file?;
let test_file_path = highlight_test_file.path();
let test_file_name = highlight_test_file.file_name();
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
let highlight_config = language_config
.highlight_config(language)?
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
" ✓ {} ({} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
println!(
" ✗ {}",
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
);
println!(" {}", e);
print!(
"{indent:indent_level$}",
indent = "",
indent_level = indent_level * 2
);
if test_file_path.is_dir() && !test_file_path.read_dir()?.next().is_none() {
println!("{}:", test_file_name.into_string().unwrap());
if let Err(_) = test_highlights_indented(
loader,
highlighter,
&test_file_path,
apply_all_captures,
indent_level + 1,
) {
failed = true;
}
} else {
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| anyhow!("No language found for path {:?}", test_file_path))?;
let highlight_config = language_config
.highlight_config(language, apply_all_captures, None)?
.ok_or_else(|| anyhow!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
"✓ {} ({} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
println!(
"✗ {}",
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
);
println!(
"{indent:indent_level$} {e}",
indent = "",
indent_level = indent_level * 2
);
failed = true;
}
}
}
}
@ -94,9 +127,10 @@ pub fn iterate_assertions(
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
let mut actual_highlights = Vec::new();
for Assertion {
position,
negative,
expected_capture_name: expected_highlight,
} in assertions
{
@ -120,12 +154,13 @@ pub fn iterate_assertions(
break 'highlight_loop;
}
// If the highlight matches the assertion, this test passes. Otherwise,
// If the highlight matches the assertion, or if the highlight doesn't
// match the assertion but it's negative, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if *highlight_name == *expected_highlight {
if (*highlight_name == *expected_highlight) == !negative {
passed = true;
break 'highlight_loop;
} else {
@ -165,68 +200,7 @@ pub fn test_highlight(
let assertions =
parse_position_comments(highlighter.parser(), highlight_config.language, source)?;
iterate_assertions(&assertions, &highlights, &highlight_names)?;
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
for Assertion {
position,
expected_capture_name: expected_highlight,
} in &assertions
{
let mut passed = false;
actual_highlights.clear();
'highlight_loop: loop {
// The assertions are ordered by position, so skip past all of the highlights that
// end at or before this assertion's position.
if let Some(highlight) = highlights.get(i) {
if highlight.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the highlights that start at or before this assertion's,
// position, looking for one that matches the assertion.
let mut j = i;
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
if highlight.0 > *position {
break 'highlight_loop;
}
// If the highlight matches the assertion, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if *highlight_name == *expected_highlight {
passed = true;
break 'highlight_loop;
} else {
actual_highlights.push(highlight_name);
}
j += 1;
}
} else {
break;
}
}
if !passed {
return Err(Failure {
row: position.row,
column: position.column,
expected_highlight: expected_highlight.clone(),
actual_highlights: actual_highlights.into_iter().cloned().collect(),
}
.into());
}
}
Ok(assertions.len())
iterate_assertions(&assertions, &highlights, &highlight_names)
}
pub fn get_highlight_positions(
@ -244,7 +218,7 @@ pub fn get_highlight_positions(
let source = String::from_utf8_lossy(source);
let mut char_indices = source.char_indices();
for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
loader.highlight_config_for_injection_string(string)
loader.highlight_config_for_injection_string(string, highlight_config.apply_all_captures)
})? {
match event? {
HighlightEvent::HighlightStart(h) => highlight_stack.push(h),

View file

@ -95,6 +95,7 @@ pub fn test_tag(
let mut actual_tags = Vec::<&String>::new();
for Assertion {
position,
negative,
expected_capture_name: expected_tag,
} in &assertions
{
@ -116,7 +117,7 @@ pub fn test_tag(
}
let tag_name = &tag.2;
if *tag_name == *expected_tag {
if (*tag_name == *expected_tag) == !negative {
passed = true;
break 'tag_loop;
} else {
@ -124,6 +125,9 @@ pub fn test_tag(
}
j += 1;
if tag == tags.last().unwrap() {
break 'tag_loop;
}
}
} else {
break;

View file

@ -0,0 +1,279 @@
use super::helpers::fixtures::get_language;
use std::future::Future;
use std::pin::{pin, Pin};
use std::ptr;
use std::task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker};
use tree_sitter::Parser;
#[test]
fn test_node_in_fut() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let fut_val_fn = || async {
// eprintln!("fut_val_fn: {}", root.child(0).unwrap().kind());
yield_now().await;
root.child(0).unwrap().kind()
};
yield_now().await;
let fut_ref_fn = || async {
// eprintln!("fut_ref_fn: {}", root_ref.child(0).unwrap().kind());
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val_fn().await;
let f2 = fut_ref_fn().await;
assert_eq!(f1, f2);
let fut_val = async {
// eprintln!("fut_val: {}", root.child(0).unwrap().kind());
yield_now().await;
root.child(0).unwrap().kind()
};
let fut_ref = async {
// eprintln!("fut_ref: {}", root_ref.child(0).unwrap().kind());
yield_now().await;
root_ref.child(0).unwrap().kind()
};
let f1 = fut_val.await;
let f2 = fut_ref.await;
assert_eq!(f1, f2);
f1
})
.join();
// eprintln!("pended: {pended:?}");
assert_eq!(ret, "comment");
assert_eq!(pended, 5);
}
#[test]
fn test_node_and_cursor_ref_in_fut() {
let (_, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = async {
yield_now().await;
root.to_sexp();
};
yield_now().await;
let fut_ref = async {
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val.await;
fut_ref.await;
cursor_ref.goto_first_child();
})
.join();
assert_eq!(pended, 3);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_fut_fabrics() {
let (_, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || async {
yield_now().await;
root.to_sexp();
};
yield_now().await;
let fut_ref = || async move {
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
};
fut_val().await;
fut_val().await;
fut_ref().await;
})
.join();
assert_eq!(pended, 4);
}
#[test]
fn test_node_and_cursor_ref_in_fut_with_inner_spawns() {
let (ret, pended) = tokio_like_spawn(async {
let mut parser = Parser::new();
let language = get_language("bash");
parser.set_language(language).unwrap();
let tree = parser.parse("#", None).unwrap();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
cursor_ref.goto_first_child();
let fut_val = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
root.to_sexp();
cursor_ref.goto_first_child();
}
};
yield_now().await;
let fut_ref = || {
let tree = tree.clone();
async move {
let root = tree.root_node();
let root_ref = &root;
let mut cursor = tree.walk();
let cursor_ref = &mut cursor;
yield_now().await;
root_ref.to_sexp();
cursor_ref.goto_first_child();
}
};
let (_, p1) = tokio_like_spawn(fut_val()).await.unwrap();
let (_, p2) = tokio_like_spawn(fut_ref()).await.unwrap();
cursor_ref.goto_first_child();
fut_val().await;
fut_val().await;
fut_ref().await;
cursor_ref.goto_first_child();
p1 + p2
})
.join();
assert_eq!(pended, 4);
assert_eq!(ret, 2);
}
fn tokio_like_spawn<T>(future: T) -> JoinHandle<(T::Output, usize)>
where
T: Future + Send + 'static,
T::Output: Send + 'static,
{
// No runtime, just noop waker
let waker = noop_waker();
let mut cx = task::Context::from_waker(&waker);
let mut pending = 0;
let mut future = pin!(future);
let ret = loop {
match future.as_mut().poll(&mut cx) {
Poll::Pending => pending += 1,
Poll::Ready(r) => {
// eprintln!("ready, pended: {pending}");
break r;
}
}
};
JoinHandle::new((ret, pending))
}
async fn yield_now() {
struct SimpleYieldNow {
yielded: bool,
}
impl Future for SimpleYieldNow {
type Output = ();
fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<()> {
cx.waker().clone().wake();
if self.yielded {
return Poll::Ready(());
}
self.yielded = true;
Poll::Pending
}
}
SimpleYieldNow { yielded: false }.await
}
pub fn noop_waker() -> Waker {
const VTABLE: RawWakerVTable = RawWakerVTable::new(
// Cloning just returns a new no-op raw waker
|_| RAW,
// `wake` does nothing
|_| {},
// `wake_by_ref` does nothing
|_| {},
// Dropping does nothing as we don't allocate anything
|_| {},
);
const RAW: RawWaker = RawWaker::new(ptr::null(), &VTABLE);
unsafe { Waker::from_raw(RAW) }
}
struct JoinHandle<T> {
data: Option<T>,
}
impl<T> JoinHandle<T> {
fn new(data: T) -> Self {
Self { data: Some(data) }
}
fn join(&mut self) -> T {
self.data.take().unwrap()
}
}
impl<T: Unpin> Future for JoinHandle<T> {
type Output = std::result::Result<T, ()>;
fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Self::Output> {
let data = self.get_mut().data.take().unwrap();
Poll::Ready(Ok(data))
}
}

View file

@ -1,7 +1,8 @@
use super::helpers::{
allocations,
edits::{get_random_edit, invert_edit},
fixtures::{fixtures_dir, get_language, get_test_language},
fixtures::{fixtures_dir, get_language, get_test_language, SCRATCH_BASE_DIR},
new_seed,
random::Rand,
scope_sequence::ScopeSequence,
EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_ENABLED, LOG_GRAPH_ENABLED,
@ -13,70 +14,81 @@ use crate::{
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
util,
};
use std::fs;
use std::{collections::HashMap, env, fs};
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
use tree_sitter_proc_macro::test_with_seed;
#[test]
fn test_bash_corpus() {
test_language_corpus("bash");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_bash(seed: usize) {
test_language_corpus(
"bash",
seed,
Some(&[
// Fragile tests where edit customization changes
// lead to significant parse tree structure changes.
"bash - corpus - commands - Nested Heredocs",
"bash - corpus - commands - Quoted Heredocs",
"bash - corpus - commands - Heredocs with weird characters",
]),
);
}
#[test]
fn test_c_corpus() {
test_language_corpus("c");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_c(seed: usize) {
test_language_corpus("c", seed, None);
}
#[test]
fn test_cpp_corpus() {
test_language_corpus("cpp");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_cpp(seed: usize) {
test_language_corpus("cpp", seed, None);
}
#[test]
fn test_embedded_template_corpus() {
test_language_corpus("embedded-template");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_embedded_template(seed: usize) {
test_language_corpus("embedded-template", seed, None);
}
#[test]
fn test_go_corpus() {
test_language_corpus("go");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_go(seed: usize) {
test_language_corpus("go", seed, None);
}
#[test]
fn test_html_corpus() {
test_language_corpus("html");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_html(seed: usize) {
test_language_corpus("html", seed, None);
}
#[test]
fn test_javascript_corpus() {
test_language_corpus("javascript");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_javascript(seed: usize) {
test_language_corpus("javascript", seed, None);
}
#[test]
fn test_json_corpus() {
test_language_corpus("json");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_json(seed: usize) {
test_language_corpus("json", seed, None);
}
#[test]
fn test_php_corpus() {
test_language_corpus("php");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_php(seed: usize) {
test_language_corpus("php", seed, None);
}
#[test]
fn test_python_corpus() {
test_language_corpus("python");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_python(seed: usize) {
test_language_corpus("python", seed, None);
}
#[test]
fn test_ruby_corpus() {
test_language_corpus("ruby");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_ruby(seed: usize) {
test_language_corpus("ruby", seed, None);
}
#[test]
fn test_rust_corpus() {
test_language_corpus("rust");
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_rust(seed: usize) {
test_language_corpus("rust", seed, None);
}
fn test_language_corpus(language_name: &str) {
fn test_language_corpus(language_name: &str, start_seed: usize, skipped: Option<&[&str]>) {
let grammars_dir = fixtures_dir().join("grammars");
let error_corpus_dir = fixtures_dir().join("error_corpus");
let template_corpus_dir = fixtures_dir().join("template_corpus");
@ -98,10 +110,30 @@ fn test_language_corpus(language_name: &str) {
t
}));
let mut skipped = skipped.map(|x| HashMap::<&str, usize>::from_iter(x.iter().map(|x| (*x, 0))));
let language = get_language(language_name);
let mut failure_count = 0;
for test in tests {
println!(" {} example - {}", language_name, test.name);
let log_seed = env::var("TREE_SITTER_LOG_SEED").is_ok();
let dump_edits = env::var("TREE_SITTER_DUMP_EDITS").is_ok();
if log_seed {
println!(" start seed: {}", start_seed);
}
println!();
for (test_index, test) in tests.iter().enumerate() {
let test_name = format!("{language_name} - {}", test.name);
if let Some(skipped) = skipped.as_mut() {
if let Some(counter) = skipped.get_mut(test_name.as_str()) {
println!(" {test_index}. {test_name} - SKIPPED");
*counter += 1;
continue;
}
}
println!(" {test_index}. {test_name}");
let passed = allocations::record(|| {
let mut log_session = None;
@ -116,10 +148,7 @@ fn test_language_corpus(language_name: &str) {
}
if actual_output != test.output {
println!(
"Incorrect initial parse for {} - {}",
language_name, test.name,
);
println!("Incorrect initial parse for {test_name}");
print_diff_key();
print_diff(&actual_output, &test.output);
println!("");
@ -140,7 +169,7 @@ fn test_language_corpus(language_name: &str) {
drop(parser);
for trial in 0..*ITERATION_COUNT {
let seed = *START_SEED + trial;
let seed = start_seed + trial;
let passed = allocations::record(|| {
let mut rand = Rand::new(seed);
let mut log_session = None;
@ -158,10 +187,21 @@ fn test_language_corpus(language_name: &str) {
for _ in 0..1 + rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit);
perform_edit(&mut tree, &mut input, &edit).unwrap();
}
// println!(" seed: {}", seed);
if log_seed {
println!(" {test_index}.{trial:<2} seed: {}", seed);
}
if dump_edits {
fs::write(
SCRATCH_BASE_DIR
.join(format!("edit.{seed}.{test_index}.{trial} {test_name}")),
&input,
)
.unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -173,16 +213,13 @@ fn test_language_corpus(language_name: &str) {
// Check that the new tree is consistent.
check_consistent_sizes(&tree2, &input);
if let Err(message) = check_changed_ranges(&tree, &tree2, &input) {
println!(
"\nUnexpected scope change in seed {}\n{}\n\n",
seed, message
);
println!("\nUnexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n",);
return false;
}
// Undo all of the edits and re-parse again.
while let Some(edit) = undo_stack.pop() {
perform_edit(&mut tree2, &mut input, &edit);
perform_edit(&mut tree2, &mut input, &edit).unwrap();
}
if *LOG_GRAPH_ENABLED {
eprintln!("{}\n", String::from_utf8_lossy(&input));
@ -198,10 +235,7 @@ fn test_language_corpus(language_name: &str) {
}
if actual_output != test.output {
println!(
"Incorrect parse for {} - {} - seed {}",
language_name, test.name, seed
);
println!("Incorrect parse for {test_name} - seed {seed}");
print_diff_key();
print_diff(&actual_output, &test.output);
println!("");
@ -211,7 +245,7 @@ fn test_language_corpus(language_name: &str) {
// Check that the edited tree is consistent.
check_consistent_sizes(&tree3, &input);
if let Err(message) = check_changed_ranges(&tree2, &tree3, &input) {
eprintln!("Unexpected scope change in seed {}\n{}\n\n", seed, message);
println!("Unexpected scope change in seed {seed} with start seed {start_seed}\n{message}\n\n");
return false;
}
@ -228,6 +262,18 @@ fn test_language_corpus(language_name: &str) {
if failure_count > 0 {
panic!("{} {} corpus tests failed", failure_count, language_name);
}
if let Some(skipped) = skipped.as_mut() {
skipped.retain(|_, v| *v == 0);
if skipped.len() > 0 {
println!("Non matchable skip definitions:");
for k in skipped.keys() {
println!(" {k}");
}
panic!("Non matchable skip definitions needs to be removed");
}
}
}
#[test]
@ -255,7 +301,7 @@ fn test_feature_corpus_files() {
grammar_path = test_path.join("grammar.json");
}
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = generate::load_grammar_file(&grammar_path).unwrap();
let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result = generate::generate_parser_for_grammar(&grammar_json);
if error_message_path.exists() {
@ -424,7 +470,12 @@ fn set_included_ranges(parser: &mut Parser, input: &[u8], delimiters: Option<(&s
let mut ranges = Vec::new();
let mut ix = 0;
while ix < input.len() {
let Some(mut start_ix) = input[ix..].windows(2).position(|win| win == start.as_bytes()) else { break };
let Some(mut start_ix) = input[ix..]
.windows(2)
.position(|win| win == start.as_bytes())
else {
break;
};
start_ix += ix + start.len();
let end_ix = input[start_ix..]
.windows(2)
@ -492,6 +543,7 @@ fn flatten_tests(test: TestEntry) -> Vec<FlattenedTest> {
input,
output,
has_fields,
..
} => {
if !prefix.is_empty() {
name.insert_str(0, " - ");

View file

@ -0,0 +1,42 @@
// Tests in this mod need be executed with enabled UBSAN library:
// ```
// UBSAN_OPTIONS="halt_on_error=1" \
// CFLAGS="-fsanitize=undefined" \
// RUSTFLAGS="-lubsan" \
// cargo test --target $(rustc -vV | sed -nr 's/^host: //p') -- --test-threads 1
// ```
use super::helpers::query_helpers::assert_query_matches;
use crate::tests::helpers::fixtures::get_language;
use indoc::indoc;
use tree_sitter::Query;
#[test]
fn issue_2162_out_of_bound() {
let language = get_language("java");
assert!(Query::new(language, "(package_declaration _ (_) @name _)").is_ok());
}
#[test]
fn issue_2107_first_child_group_anchor_had_no_effect() {
let language = get_language("c");
let source_code = indoc! {r#"
void fun(int a, char b, int c) { };
"#};
let query = indoc! {r#"
(parameter_list
.
(
(parameter_declaration) @constant
(#match? @constant "^int")
)
)
"#};
let query = Query::new(language, query).unwrap();
assert_query_matches(
language,
&query,
source_code,
&[(0, vec![("constant", "int a")])],
);
}

View file

@ -2,7 +2,7 @@ use std::{
collections::HashMap,
os::raw::c_void,
sync::{
atomic::{AtomicBool, AtomicU64, Ordering::SeqCst},
atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst},
Mutex,
},
};
@ -25,8 +25,8 @@ unsafe impl Sync for Allocation {}
#[derive(Default)]
struct AllocationRecorder {
enabled: AtomicBool,
allocation_count: AtomicU64,
outstanding_allocations: Mutex<HashMap<Allocation, u64>>,
allocation_count: AtomicUsize,
outstanding_allocations: Mutex<HashMap<Allocation, usize>>,
}
thread_local! {
@ -83,6 +83,9 @@ fn record_alloc(ptr: *mut c_void) {
}
fn record_dealloc(ptr: *mut c_void) {
if ptr.is_null() {
panic!("Zero pointer deallocation!");
}
RECORDER.with(|recorder| {
if recorder.enabled.load(SeqCst) {
recorder
@ -107,9 +110,13 @@ unsafe extern "C" fn ts_record_calloc(count: usize, size: usize) -> *mut c_void
}
unsafe extern "C" fn ts_record_realloc(ptr: *mut c_void, size: usize) -> *mut c_void {
record_dealloc(ptr);
let result = realloc(ptr, size);
record_alloc(result);
if ptr.is_null() {
record_alloc(result);
} else if ptr != result {
record_dealloc(ptr);
record_alloc(result);
}
result
}

View file

@ -1,11 +1,46 @@
lazy_static! {
static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
static ref SCRATCH_DIR: PathBuf = {
pub static ref ROOT_DIR: PathBuf = PathBuf::from(env!("CARGO_MANIFEST_DIR")).parent().unwrap().to_owned();
pub static ref FIXTURES_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures");
pub static ref HEADER_DIR: PathBuf = ROOT_DIR.join("lib").join("include");
pub static ref GRAMMARS_DIR: PathBuf = ROOT_DIR.join("test").join("fixtures").join("grammars");
pub static ref SCRATCH_BASE_DIR: PathBuf = {
let result = ROOT_DIR.join("target").join("scratch");
fs::create_dir_all(&result).unwrap();
result
};
pub static ref SCRATCH_DIR: PathBuf = {
// https://doc.rust-lang.org/reference/conditional-compilation.html
let vendor = if cfg!(target_vendor = "apple") {
"apple"
} else if cfg!(target_vendor = "fortanix") {
"fortanix"
} else if cfg!(target_vendor = "pc") {
"pc"
} else {
"unknown"
};
let env = if cfg!(target_env = "gnu") {
"gnu"
} else if cfg!(target_env = "msvc") {
"msvc"
} else if cfg!(target_env = "musl") {
"musl"
} else if cfg!(target_env = "sgx") {
"sgx"
} else {
"unknown"
};
let endian = if cfg!(target_endian = "little") {
"little"
} else if cfg!(target_endian = "big") {
"big"
} else {
"unknown"
};
let machine = format!("{}-{}-{}-{}-{}", std::env::consts::ARCH, std::env::consts::OS, vendor, env, endian);
let result = SCRATCH_BASE_DIR.join(machine);
fs::create_dir_all(&result).unwrap();
result
};
}

View file

@ -1,6 +1,6 @@
use lazy_static::lazy_static;
use std::fs;
use std::path::{Path, PathBuf};
use std::{env, fs};
use tree_sitter::Language;
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_loader::Loader;
@ -9,7 +9,13 @@ use tree_sitter_tags::TagsConfiguration;
include!("./dirs.rs");
lazy_static! {
static ref TEST_LOADER: Loader = Loader::with_parser_lib_path(SCRATCH_DIR.join("lib"));
static ref TEST_LOADER: Loader = {
let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
loader.use_debug_build(true);
}
loader
};
}
pub fn test_loader<'a>() -> &'a Loader {
@ -46,9 +52,11 @@ pub fn get_highlight_config(
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new());
let mut result = HighlightConfiguration::new(
language,
language_name,
&highlights_query,
&injections_query,
&locals_query,
false,
)
.unwrap();
result.configure(&highlight_names);
@ -63,11 +71,7 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
TagsConfiguration::new(language, &tags_query, &locals_query).unwrap()
}
pub fn get_test_language(
name: &str,
parser_code: &str,
scanner_src_path: Option<&Path>,
) -> Language {
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
let src_dir = SCRATCH_DIR.join("src").join(name);
fs::create_dir_all(&src_dir).unwrap();
@ -76,11 +80,16 @@ pub fn get_test_language(
fs::write(&parser_path, parser_code).unwrap();
}
if let Some(scanner_src_path) = scanner_src_path {
let scanner_code = fs::read_to_string(&scanner_src_path).unwrap();
let scanner_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_path).map_or(false, |content| content == scanner_code) {
fs::write(&scanner_path, scanner_code).unwrap();
if let Some(path) = path {
let scanner_path = path.join("scanner.c");
if scanner_path.exists() {
let scanner_code = fs::read_to_string(&scanner_path).unwrap();
let scanner_copy_path = src_dir.join("scanner.c");
if !fs::read_to_string(&scanner_copy_path)
.map_or(false, |content| content == scanner_code)
{
fs::write(&scanner_copy_path, scanner_code).unwrap();
}
}
}

View file

@ -6,7 +6,8 @@ pub(super) mod random;
pub(super) mod scope_sequence;
use lazy_static::lazy_static;
use std::{env, time, usize};
use rand::Rng;
use std::env;
lazy_static! {
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();
@ -16,11 +17,7 @@ lazy_static! {
}
lazy_static! {
pub static ref START_SEED: usize =
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| time::SystemTime::now()
.duration_since(time::UNIX_EPOCH)
.unwrap()
.as_secs() as usize,);
pub static ref START_SEED: usize = new_seed();
pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3);
pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10);
}
@ -28,3 +25,10 @@ lazy_static! {
fn int_env_var(name: &'static str) -> Option<usize> {
env::var(name).ok().and_then(|e| e.parse().ok())
}
pub(crate) fn new_seed() -> usize {
int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| {
let mut rng = rand::thread_rng();
rng.gen::<usize>()
})
}

View file

@ -1,6 +1,8 @@
use rand::prelude::Rng;
use std::{cmp::Ordering, fmt::Write, ops::Range};
use tree_sitter::{Node, Point, Tree, TreeCursor};
use tree_sitter::{
Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
};
#[derive(Debug)]
pub struct Pattern {
@ -304,3 +306,56 @@ fn compare_depth_first(a: Node, b: Node) -> Ordering {
let b = b.byte_range();
a.start.cmp(&b.start).then_with(|| b.end.cmp(&a.end))
}
pub fn assert_query_matches(
language: Language,
query: &Query,
source: &str,
expected: &[(usize, Vec<(&str, &str)>)],
) {
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
let mut cursor = QueryCursor::new();
let matches = cursor.matches(&query, tree.root_node(), source.as_bytes());
pretty_assertions::assert_eq!(collect_matches(matches, &query, source), expected);
pretty_assertions::assert_eq!(cursor.did_exceed_match_limit(), false);
}
pub fn collect_matches<'a>(
matches: impl Iterator<Item = QueryMatch<'a, 'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(usize, Vec<(&'a str, &'a str)>)> {
matches
.map(|m| {
(
m.pattern_index,
format_captures(m.captures.iter().cloned(), query, source),
)
})
.collect()
}
pub fn collect_captures<'a>(
captures: impl Iterator<Item = (QueryMatch<'a, 'a>, usize)>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
format_captures(captures.map(|(m, i)| m.captures[i]), query, source)
}
fn format_captures<'a>(
captures: impl Iterator<Item = QueryCapture<'a>>,
query: &'a Query,
source: &'a str,
) -> Vec<(&'a str, &'a str)> {
captures
.map(|capture| {
(
query.capture_names()[capture.index as usize],
capture.node.utf8_text(source.as_bytes()).unwrap(),
)
})
.collect()
}

View file

@ -24,6 +24,7 @@ lazy_static! {
get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES);
static ref HIGHLIGHT_NAMES: Vec<String> = [
"attribute",
"boolean",
"carriage-return",
"comment",
"constant",
@ -61,7 +62,7 @@ lazy_static! {
fn test_highlighting_javascript() {
let source = "const a = function(b) { return b + c; }";
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&to_token_vector(source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
@ -71,14 +72,14 @@ fn test_highlighting_javascript() {
(" ", vec![]),
("function", vec!["keyword"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("return", vec!["keyword"]),
(" ", vec![]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(" ", vec![]),
("+", vec!["operator"]),
(" ", vec![]),
@ -92,7 +93,7 @@ fn test_highlighting_javascript() {
#[test]
fn test_highlighting_injected_html_in_javascript() {
let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
let source = ["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
@ -156,7 +157,7 @@ fn test_highlighting_injected_javascript_in_html_mini() {
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = vec![
let source = [
"<body>",
" <script>",
" const x = new Thing();",
@ -211,7 +212,7 @@ fn test_highlighting_injected_javascript_in_html() {
#[test]
fn test_highlighting_multiline_nodes_to_html() {
let source = vec![
let source = [
"const SOMETHING = `",
" one ${",
" two()",
@ -235,7 +236,7 @@ fn test_highlighting_multiline_nodes_to_html() {
#[test]
fn test_highlighting_with_local_variable_tracking() {
let source = vec![
let source = [
"module.exports = function a(b) {",
" const module = c;",
" console.log(module, b);",
@ -257,7 +258,7 @@ fn test_highlighting_with_local_variable_tracking() {
(" ", vec![]),
("a", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"])
@ -284,7 +285,7 @@ fn test_highlighting_with_local_variable_tracking() {
(",", vec!["punctuation.delimiter"]),
(" ", vec![]),
// A parameter, because `b` was defined as a parameter above.
("b", vec!["variable.parameter"]),
("b", vec!["variable"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
@ -295,7 +296,7 @@ fn test_highlighting_with_local_variable_tracking() {
#[test]
fn test_highlighting_empty_lines() {
let source = vec![
let source = [
"class A {",
"",
" b(c) {",
@ -313,7 +314,7 @@ fn test_highlighting_empty_lines() {
&[
"<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable.parameter>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
"\n".to_string(),
@ -329,7 +330,7 @@ fn test_highlighting_carriage_returns() {
let source = "a = \"a\rb\"\r\nb\r";
assert_eq!(
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&to_html(source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span>b&quot;</span>\n",
"<span class=variable>b</span>\n",
@ -339,7 +340,7 @@ fn test_highlighting_carriage_returns() {
#[test]
fn test_highlighting_ejs_with_html_and_javascript() {
let source = vec!["<div><% foo() %></div><script> bar() </script>"].join("\n");
let source = ["<div><% foo() %></div><script> bar() </script>"].join("\n");
assert_eq!(
&to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
@ -376,7 +377,7 @@ fn test_highlighting_ejs_with_html_and_javascript() {
fn test_highlighting_javascript_with_jsdoc() {
// Regression test: the middle comment has no highlights. This should not prevent
// later injections from highlighting properly.
let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
let source = ["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
@ -404,7 +405,7 @@ fn test_highlighting_javascript_with_jsdoc() {
#[test]
fn test_highlighting_with_content_children_included() {
let source = vec!["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
let source = ["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
assert_eq!(
&to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
@ -482,7 +483,7 @@ fn test_highlighting_cancellation() {
#[test]
fn test_highlighting_via_c_api() {
let highlights = vec![
let highlights = [
"class=tag\0",
"class=function\0",
"class=string\0",
@ -496,68 +497,82 @@ fn test_highlighting_via_c_api() {
.iter()
.map(|h| h.as_bytes().as_ptr() as *const c_char)
.collect::<Vec<_>>();
let highlighter = c::ts_highlighter_new(
&highlight_names[0] as *const *const c_char,
&highlight_attrs[0] as *const *const c_char,
highlights.len() as u32,
);
let highlighter = unsafe {
c::ts_highlighter_new(
&highlight_names[0] as *const *const c_char,
&highlight_attrs[0] as *const *const c_char,
highlights.len() as u32,
)
};
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
let js_scope = c_string("source.js");
let js_injection_regex = c_string("^javascript");
let language = get_language("javascript");
let lang_name = c_string("javascript");
let queries = get_language_queries_path("javascript");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
locals_query.as_ptr() as *const c_char,
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
);
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
locals_query.as_ptr() as *const c_char,
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
false,
);
}
let html_scope = c_string("text.html.basic");
let html_injection_regex = c_string("^html");
let language = get_language("html");
let lang_name = c_string("html");
let queries = get_language_queries_path("html");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
);
unsafe {
c::ts_highlighter_add_language(
highlighter,
lang_name.as_ptr(),
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const c_char,
injections_query.as_ptr() as *const c_char,
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
false,
);
}
let buffer = c::ts_highlight_buffer_new();
c::ts_highlighter_highlight(
highlighter,
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
unsafe {
c::ts_highlighter_highlight(
highlighter,
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
ptr::null_mut(),
);
}
let output_bytes = c::ts_highlight_buffer_content(buffer);
let output_line_offsets = c::ts_highlight_buffer_line_offsets(buffer);
let output_len = c::ts_highlight_buffer_len(buffer);
let output_line_count = c::ts_highlight_buffer_line_count(buffer);
let output_bytes = unsafe { c::ts_highlight_buffer_content(buffer) };
let output_line_offsets = unsafe { c::ts_highlight_buffer_line_offsets(buffer) };
let output_len = unsafe { c::ts_highlight_buffer_len(buffer) };
let output_line_count = unsafe { c::ts_highlight_buffer_line_count(buffer) };
let output_bytes = unsafe { slice::from_raw_parts(output_bytes, output_len as usize) };
let output_line_offsets =
@ -583,8 +598,69 @@ fn test_highlighting_via_c_api() {
]
);
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
unsafe {
c::ts_highlighter_delete(highlighter);
c::ts_highlight_buffer_delete(buffer);
}
}
#[test]
fn test_highlighting_with_all_captures_applied() {
let source = "fn main(a: u32, b: u32) -> { let c = a + b; }";
let language = get_language("rust");
let highlights_query = indoc::indoc! {"
[
\"fn\"
\"let\"
] @keyword
(identifier) @variable
(function_item name: (identifier) @function)
(parameter pattern: (identifier) @variable.parameter)
(primitive_type) @type.builtin
\"=\" @operator
[ \"->\" \":\" \";\" ] @punctuation.delimiter
[ \"{\" \"}\" \"(\" \")\" ] @punctuation.bracket
"};
let mut rust_highlight_reverse =
HighlightConfiguration::new(language, "rust", highlights_query, "", "", true).unwrap();
rust_highlight_reverse.configure(&HIGHLIGHT_NAMES);
assert_eq!(
&to_token_vector(source, &rust_highlight_reverse).unwrap(),
&[[
("fn", vec!["keyword"]),
(" ", vec![]),
("main", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("a", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(", ", vec![]),
("b", vec!["variable.parameter"]),
(":", vec!["punctuation.delimiter"]),
(" ", vec![]),
("u32", vec!["type.builtin"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("->", vec!["punctuation.delimiter"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("let", vec!["keyword"]),
(" ", vec![]),
("c", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("a", vec!["variable"]),
(" + ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"])
]],
);
}
#[test]
@ -667,20 +743,20 @@ fn to_token_vector<'a>(
}
HighlightEvent::Source { start, end } => {
let s = str::from_utf8(&src[start..end]).unwrap();
for (i, l) in s.split("\n").enumerate() {
for (i, l) in s.split('\n').enumerate() {
let l = l.trim_end_matches('\r');
if i > 0 {
lines.push(line);
line = Vec::new();
}
if l.len() > 0 {
if !l.is_empty() {
line.push((l, highlights.clone()));
}
}
}
}
}
if line.len() > 0 {
if !line.is_empty() {
lines.push(line);
}
Ok(lines)

View file

@ -0,0 +1,95 @@
use super::helpers::fixtures::get_language;
use tree_sitter::Parser;
#[test]
fn test_lookahead_iterator() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
assert_eq!(
next_state,
language.next_state(cursor.node().parse_state(), cursor.node().grammar_id())
);
assert!((next_state as usize) < language.parse_state_count());
assert!(cursor.goto_next_sibling()); // type_identifier
assert_eq!(next_state, cursor.node().parse_state());
assert_eq!(cursor.node().grammar_name(), "identifier");
assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id());
let expected_symbols = ["identifier", "block_comment", "line_comment"];
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
assert_eq!(lookahead.language(), language);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset_state(next_state);
assert!(lookahead.iter_names().eq(expected_symbols));
lookahead.reset(language, next_state);
assert!(lookahead
.map(|s| language.node_kind_for_id(s).unwrap())
.eq(expected_symbols));
}
#[test]
fn test_lookahead_iterator_modifiable_only_by_mut() {
let mut parser = Parser::new();
let language = get_language("rust");
parser.set_language(language).unwrap();
let tree = parser.parse("struct Stuff {}", None).unwrap();
let mut cursor = tree.walk();
assert!(cursor.goto_first_child()); // struct
assert!(cursor.goto_first_child()); // struct keyword
let next_state = cursor.node().next_parse_state();
assert_ne!(next_state, 0);
let mut lookahead = language.lookahead_iterator(next_state).unwrap();
let _ = lookahead.next();
let mut names = lookahead.iter_names();
let _ = names.next();
}
/// It doesn't allowed to use lookahead iterator by shared ref:
/// error[E0596]: cannot borrow `lookahead` as mutable, as it is not declared as mutable
/// ```compile_fail
/// use tree_sitter::{Parser, Language};
/// let mut parser = Parser::new();
/// let language = unsafe { Language::from_raw(std::ptr::null()) };
/// let tree = parser.parse("", None).unwrap();
/// let mut cursor = tree.walk();
/// let next_state = cursor.node().next_parse_state();
/// let lookahead = language.lookahead_iterator(next_state).unwrap();
/// let _ = lookahead.next();
/// ```
/// It doesn't allowed to use lookahead names iterator by shared ref:
/// error[E0596]: cannot borrow `names` as mutable, as it is not declared as mutable
/// ```compile_fail
/// use tree_sitter::{Parser, Language};
/// let mut parser = Parser::new();
/// let language = unsafe { Language::from_raw(std::ptr::null()) };
/// let tree = parser.parse("", None).unwrap();
/// let mut cursor = tree.walk();
/// let next_state = cursor.node().next_parse_state();
/// if let Some(mut lookahead) = language.lookahead_iterator(next_state) {
/// let _ = lookahead.next();
/// let names = lookahead.iter_names();
/// let _ = names.next();
/// }
/// ```
fn _dummy() {}

View file

@ -1,11 +1,16 @@
mod async_context_test;
mod corpus_test;
mod github_issue_test;
mod helpers;
mod highlight_test;
mod language_test;
mod node_test;
mod parser_hang_test;
mod parser_test;
mod pathological_test;
mod query_test;
mod tags_test;
mod test_highlight_test;
mod test_tags_test;
mod text_provider_test;
mod tree_test;

View file

@ -252,12 +252,14 @@ fn test_node_parent_of_child_by_field_name() {
fn test_node_field_name_for_child() {
let mut parser = Parser::new();
parser.set_language(get_language("c")).unwrap();
let tree = parser.parse("x + y;", None).unwrap();
let tree = parser.parse("int w = x + y;", None).unwrap();
let translation_unit_node = tree.root_node();
let binary_expression_node = translation_unit_node
.named_child(0)
let declaration_node = translation_unit_node.named_child(0).unwrap();
let binary_expression_node = declaration_node
.child_by_field_name("declarator")
.unwrap()
.named_child(0)
.child_by_field_name("value")
.unwrap();
assert_eq!(binary_expression_node.field_name_for_child(0), Some("left"));
@ -385,10 +387,52 @@ fn test_node_named_child_with_aliases_and_extras() {
assert_eq!(root.named_child(4).unwrap().kind(), "C");
}
#[test]
fn test_node_descendant_count() {
let tree = parse_json_example();
let value_node = tree.root_node();
let all_nodes = get_all_nodes(&tree);
assert_eq!(value_node.descendant_count(), all_nodes.len());
let mut cursor = value_node.walk();
for (i, node) in all_nodes.iter().enumerate() {
cursor.goto_descendant(i);
assert_eq!(cursor.node(), *node, "index {i}");
}
for (i, node) in all_nodes.iter().enumerate().rev() {
cursor.goto_descendant(i);
assert_eq!(cursor.node(), *node, "rev index {i}");
}
}
#[test]
fn test_descendant_count_single_node_tree() {
let mut parser = Parser::new();
parser
.set_language(get_language("embedded-template"))
.unwrap();
let tree = parser.parse("hello", None).unwrap();
let nodes = get_all_nodes(&tree);
assert_eq!(nodes.len(), 2);
assert_eq!(tree.root_node().descendant_count(), 2);
let mut cursor = tree.root_node().walk();
cursor.goto_descendant(0);
assert_eq!(cursor.depth(), 0);
assert_eq!(cursor.node(), nodes[0]);
cursor.goto_descendant(1);
assert_eq!(cursor.depth(), 1);
assert_eq!(cursor.node(), nodes[1]);
}
#[test]
fn test_node_descendant_for_range() {
let tree = parse_json_example();
let array_node = tree.root_node().child(0).unwrap();
let array_node = tree.root_node();
// Leaf node exactly matches the given bounds - byte query
let colon_index = JSON_EXAMPLE.find(":").unwrap();
@ -508,7 +552,7 @@ fn test_node_edit() {
let edit = get_random_edit(&mut rand, &mut code);
let mut tree2 = tree.clone();
let edit = perform_edit(&mut tree2, &mut code, &edit);
let edit = perform_edit(&mut tree2, &mut code, &edit).unwrap();
for node in nodes_before.iter_mut() {
node.edit(&edit);
}
@ -841,15 +885,17 @@ fn get_all_nodes(tree: &Tree) -> Vec<Node> {
let mut visited_children = false;
let mut cursor = tree.walk();
loop {
result.push(cursor.node());
if !visited_children && cursor.goto_first_child() {
continue;
} else if cursor.goto_next_sibling() {
visited_children = false;
} else if cursor.goto_parent() {
visited_children = true;
if !visited_children {
result.push(cursor.node());
if !cursor.goto_first_child() {
visited_children = true;
}
} else {
break;
if cursor.goto_next_sibling() {
visited_children = false;
} else if !cursor.goto_parent() {
break;
}
}
}
return result;

View file

@ -0,0 +1,104 @@
// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
use crate::{
generate::{generate_parser_for_grammar, load_grammar_file},
tests::helpers::fixtures::{fixtures_dir, get_test_language},
};
use std::{
env::VarError,
process::{Command, Stdio},
};
use tree_sitter::Parser;
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
// because they don't work well with subprocesses _(it's an assumption)_.
//
// Bellow are two alternative examples of how to disable tests for some arches
// if a way with excluding the whole mod from compilation wouldn't work well.
//
// XXX: Also may be it makes sense to keep such tests as ignored by default
// to omit surprises and enable them on CI by passing an extra option explicitly:
//
// > cargo test -- --include-ignored
//
// #[cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
// #[cfg_attr(not(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing))), ignore)]
//
#[test]
fn test_grammar_that_should_hang_and_not_segfault() {
let parent_sleep_millis = 1000;
let test_name = "test_grammar_that_should_hang_and_not_segfault";
let test_var = "CARGO_HANG_TEST";
eprintln!(" {test_name}");
let tests_exec_path = std::env::args()
.nth(0)
.expect("Failed get get tests executable path");
match std::env::var(test_var) {
Ok(v) if v == test_name => {
eprintln!(" child process id {}", std::process::id());
hang_test();
}
Err(VarError::NotPresent) => {
eprintln!(" parent process id {}", std::process::id());
if true {
let mut command = Command::new(tests_exec_path);
command.arg(test_name).env(test_var, test_name);
if std::env::args().any(|x| x == "--nocapture") {
command.arg("--nocapture");
} else {
command.stdout(Stdio::null()).stderr(Stdio::null());
}
match command.spawn() {
Ok(mut child) => {
std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
match child.try_wait() {
Ok(Some(status)) if status.success() => {
panic!("Child wasn't hang and exited successfully")
}
Ok(Some(status)) => panic!(
"Child wasn't hang and exited with status code: {:?}",
status.code()
),
_ => (),
}
if let Err(e) = child.kill() {
eprintln!(
"Failed to kill hang test sub process id: {}, error: {e}",
child.id()
);
}
}
Err(e) => panic!("{e}"),
}
}
}
Err(e) => panic!("Env var error: {e}"),
_ => unreachable!(),
}
fn hang_test() {
let test_grammar_dir = fixtures_dir()
.join("test_grammars")
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) =
generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let language =
get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let code_that_should_hang = "\nHello";
parser.parse(code_that_should_hang, None).unwrap();
}
}

View file

@ -15,6 +15,7 @@ use std::{
thread, time,
};
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
use tree_sitter_proc_macro::retry;
#[test]
fn test_parsing_simple_string() {
@ -149,7 +150,7 @@ fn test_parsing_with_custom_utf8_input() {
)
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert!(!root.has_error());
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
@ -188,7 +189,7 @@ fn test_parsing_with_custom_utf16_input() {
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert!(!root.has_error());
assert_eq!(root.child(0).unwrap().kind(), "function_item");
}
@ -277,7 +278,10 @@ fn test_parsing_invalid_chars_at_eof() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
let tree = parser.parse(b"\xdf", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))");
assert_eq!(
tree.root_node().to_sexp(),
"(document (ERROR (UNEXPECTED INVALID)))"
);
}
#[test]
@ -340,7 +344,8 @@ fn test_parsing_after_editing_beginning_of_code() {
deleted_length: 0,
inserted_text: b" || 5".to_vec(),
},
);
)
.unwrap();
let mut recorder = ReadRecorder::new(&code);
let tree = parser
@ -387,7 +392,8 @@ fn test_parsing_after_editing_end_of_code() {
deleted_length: 0,
inserted_text: b".d".to_vec(),
},
);
)
.unwrap();
let mut recorder = ReadRecorder::new(&code);
let tree = parser
@ -466,7 +472,8 @@ h + i
deleted_length: 0,
inserted_text: b"1234".to_vec(),
},
);
)
.unwrap();
assert_eq!(
code,
@ -511,7 +518,7 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
let tree = parser.parse(&source, None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string)))))"
"(module (expression_statement (assignment left: (identifier) right: (expression_list (identifier) (string (string_start) (string_content) (string_end))))))"
);
// Delete a suffix of the source code, starting in the middle of the string
@ -530,12 +537,12 @@ fn test_parsing_after_detecting_error_in_the_middle_of_a_string_token() {
let undo = invert_edit(&source, &edit);
let mut tree2 = tree.clone();
perform_edit(&mut tree2, &mut source, &edit);
perform_edit(&mut tree2, &mut source, &edit).unwrap();
tree2 = parser.parse(&source, Some(&tree2)).unwrap();
assert!(tree2.root_node().has_error());
let mut tree3 = tree2.clone();
perform_edit(&mut tree3, &mut source, &undo);
perform_edit(&mut tree3, &mut source, &undo).unwrap();
tree3 = parser.parse(&source, Some(&tree3)).unwrap();
assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp(),);
}
@ -644,6 +651,7 @@ fn test_parsing_cancelled_by_another_thread() {
// Timeouts
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
@ -662,8 +670,12 @@ fn test_parsing_with_a_timeout() {
None,
);
assert!(tree.is_none());
#[cfg(not(target_arch = "sparc64"))]
assert!(start_time.elapsed().as_micros() < 2000);
#[cfg(target_arch = "sparc64")]
assert!(start_time.elapsed().as_micros() < 8000);
// Continue parsing, but pause after 1 ms of processing.
parser.set_timeout_micros(5000);
let start_time = time::Instant::now();
@ -701,6 +713,7 @@ fn test_parsing_with_a_timeout() {
}
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout_and_a_reset() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
@ -756,6 +769,7 @@ fn test_parsing_with_a_timeout_and_a_reset() {
}
#[test]
#[retry(10)]
fn test_parsing_with_a_timeout_and_implicit_reset() {
allocations::record(|| {
let mut parser = Parser::new();
@ -789,6 +803,7 @@ fn test_parsing_with_a_timeout_and_implicit_reset() {
}
#[test]
#[retry(10)]
fn test_parsing_with_timeout_and_no_completion() {
allocations::record(|| {
let mut parser = Parser::new();
@ -828,7 +843,7 @@ fn test_parsing_with_one_included_range() {
concat!(
"(program (expression_statement (call_expression ",
"function: (member_expression object: (identifier) property: (property_identifier)) ",
"arguments: (arguments (string)))))",
"arguments: (arguments (string (string_fragment))))))",
)
);
assert_eq!(
@ -1177,7 +1192,7 @@ fn test_parsing_with_a_newly_included_range() {
.set_included_ranges(&[simple_range(range1_start, range1_end)])
.unwrap();
let tree = parser
.parse_with(&mut chunked_input(&source_code, 3), None)
.parse_with(&mut chunked_input(source_code, 3), None)
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
@ -1196,7 +1211,7 @@ fn test_parsing_with_a_newly_included_range() {
])
.unwrap();
let tree2 = parser
.parse_with(&mut chunked_input(&source_code, 3), Some(&tree))
.parse_with(&mut chunked_input(source_code, 3), Some(&tree))
.unwrap();
assert_eq!(
tree2.root_node().to_sexp(),
@ -1220,7 +1235,7 @@ fn test_parsing_with_a_newly_included_range() {
simple_range(range3_start, range3_end),
])
.unwrap();
let tree3 = parser.parse(&source_code, Some(&tree)).unwrap();
let tree3 = parser.parse(source_code, Some(&tree)).unwrap();
assert_eq!(
tree3.root_node().to_sexp(),
concat!(
@ -1297,6 +1312,85 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
assert_eq!(root.child(3).unwrap().start_byte(), 4);
}
#[test]
fn test_grammars_that_can_hang_on_eof() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_single_null_char_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
let mut parser = Parser::new();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_null_char_with_next_char_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00-\\x01]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_null_char_with_range_regex",
"rules": {
"source_file": {
"type": "SEQ",
"members": [
{ "type": "STRING", "value": "\"" },
{ "type": "PATTERN", "value": "[\\x00-\\x7F]*" },
{ "type": "STRING", "value": "\"" }
]
}
},
"extras": [ { "type": "PATTERN", "value": "\\s" } ]
}
"#,
)
.unwrap();
parser
.set_language(get_test_language(&parser_name, &parser_code, None))
.unwrap();
parser.parse("\"", None).unwrap();
}
fn simple_range(start: usize, end: usize) -> Range {
Range {
start_byte: start,

View file

@ -0,0 +1,15 @@
[package]
name = "tree-sitter-tests-proc-macro"
version = "0.0.0"
edition = "2021"
publish = false
rust-version.workspace = true
[lib]
proc-macro = true
[dependencies]
proc-macro2 = "1.0.63"
quote = "1"
rand = "0.8.5"
syn = { version = "1", features = ["full"] }

View file

@ -0,0 +1,137 @@
use proc_macro::TokenStream;
use proc_macro2::Span;
use quote::quote;
use syn::{
parse::{Parse, ParseStream},
parse_macro_input, Error, Expr, Ident, ItemFn, LitInt, Token,
};
#[proc_macro_attribute]
pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
let count = parse_macro_input!(args as LitInt);
let input = parse_macro_input!(input as ItemFn);
let attrs = input.attrs.clone();
let name = input.sig.ident.clone();
TokenStream::from(quote! {
#(#attrs),*
fn #name() {
#input
for i in 0..=#count {
let result = std::panic::catch_unwind(|| {
#name();
});
if result.is_ok() {
return;
}
if i == #count {
std::panic::resume_unwind(result.unwrap_err());
}
}
}
})
}
#[proc_macro_attribute]
pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
struct Args {
retry: LitInt,
seed: Expr,
seed_fn: Option<Ident>,
}
impl Parse for Args {
fn parse(input: ParseStream) -> syn::Result<Self> {
let mut retry = None;
let mut seed = None;
let mut seed_fn = None;
while !input.is_empty() {
let name = input.parse::<Ident>()?;
match name.to_string().as_str() {
"retry" => {
input.parse::<Token![=]>()?;
retry.replace(input.parse()?);
}
"seed" => {
input.parse::<Token![=]>()?;
seed.replace(input.parse()?);
}
"seed_fn" => {
input.parse::<Token![=]>()?;
seed_fn.replace(input.parse()?);
}
x => {
return Err(Error::new(
name.span(),
format!("Unsupported parameter `{x}`"),
))
}
}
if !input.is_empty() {
input.parse::<Token![,]>()?;
}
}
if retry.is_none() {
retry.replace(LitInt::new("0", Span::mixed_site()));
}
Ok(Args {
retry: retry.expect("`retry` parameter is required"),
seed: seed.expect("`seed` parameter is required"),
seed_fn,
})
}
}
let Args {
retry,
seed,
seed_fn,
} = parse_macro_input!(args as Args);
let seed_fn = seed_fn.iter();
let func = parse_macro_input!(input as ItemFn);
let attrs = func.attrs.clone();
let name = func.sig.ident.clone();
// dbg!(quote::ToTokens::into_token_stream(&func));
TokenStream::from(quote! {
#[test]
#(#attrs),*
fn #name() {
#func
let mut seed = #seed;
for i in 0..=#retry {
let result = std::panic::catch_unwind(|| {
#name(seed);
});
if result.is_ok() {
return;
}
if i == #retry {
std::panic::resume_unwind(result.unwrap_err());
}
#(
seed = #seed_fn();
)*
if i < #retry {
println!("\nRetry {}/{} with a new seed {}", i + 1, #retry, seed);
}
}
}
})
}

File diff suppressed because it is too large Load diff

View file

@ -9,7 +9,7 @@ use std::{
use tree_sitter::Point;
use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};
const PYTHON_TAG_QUERY: &'static str = r#"
const PYTHON_TAG_QUERY: &str = r#"
(
(function_definition
name: (identifier) @name
@ -39,7 +39,7 @@ const PYTHON_TAG_QUERY: &'static str = r#"
attribute: (identifier) @name)) @reference.call
"#;
const JS_TAG_QUERY: &'static str = r#"
const JS_TAG_QUERY: &str = r#"
(
(comment)* @doc .
(class_declaration
@ -68,7 +68,7 @@ const JS_TAG_QUERY: &'static str = r#"
function: (identifier) @name) @reference.call
"#;
const RUBY_TAG_QUERY: &'static str = r#"
const RUBY_TAG_QUERY: &str = r#"
(method
name: (_) @name) @definition.method
@ -359,25 +359,29 @@ fn test_tags_via_c_api() {
);
let c_scope_name = CString::new(scope_name).unwrap();
let result = c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
);
let result = unsafe {
c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
)
};
assert_eq!(result, c::TSTagsError::Ok);
let result = c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
);
let result = unsafe {
c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
)
};
assert_eq!(result, c::TSTagsError::Ok);
let tags = unsafe {
slice::from_raw_parts(
@ -419,8 +423,10 @@ fn test_tags_via_c_api() {
]
);
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
unsafe {
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
}
});
}

View file

@ -12,7 +12,7 @@ fn test_highlight_test_with_basic_test() {
Some("injections.scm"),
&[
"function".to_string(),
"variable.parameter".to_string(),
"variable".to_string(),
"keyword".to_string(),
],
);
@ -22,7 +22,8 @@ fn test_highlight_test_with_basic_test() {
" // ^ function",
" // ^ keyword",
" return d + e;",
" // ^ variable.parameter",
" // ^ variable",
" // ^ !variable",
"};",
]
.join("\n");
@ -32,18 +33,10 @@ fn test_highlight_test_with_basic_test() {
assert_eq!(
assertions,
&[
Assertion {
position: Point::new(1, 5),
expected_capture_name: "function".to_string()
},
Assertion {
position: Point::new(1, 11),
expected_capture_name: "keyword".to_string()
},
Assertion {
position: Point::new(4, 9),
expected_capture_name: "variable.parameter".to_string()
},
Assertion::new(1, 5, false, String::from("function")),
Assertion::new(1, 11, false, String::from("keyword")),
Assertion::new(4, 9, false, String::from("variable")),
Assertion::new(4, 11, true, String::from("variable")),
]
);
@ -60,6 +53,7 @@ fn test_highlight_test_with_basic_test() {
(Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d"
(Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return"
(Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d"
(Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e"
]
);
}

View file

@ -16,6 +16,7 @@ fn test_tags_test_with_basic_test() {
" # ^ reference.call",
" return d(e)",
" # ^ reference.call",
" # ^ !variable.parameter",
"",
]
.join("\n");
@ -26,18 +27,10 @@ fn test_tags_test_with_basic_test() {
assert_eq!(
assertions,
&[
Assertion {
position: Point::new(1, 4),
expected_capture_name: "definition.function".to_string(),
},
Assertion {
position: Point::new(3, 9),
expected_capture_name: "reference.call".to_string(),
},
Assertion {
position: Point::new(5, 11),
expected_capture_name: "reference.call".to_string(),
},
Assertion::new(1, 4, false, String::from("definition.function")),
Assertion::new(3, 9, false, String::from("reference.call")),
Assertion::new(5, 11, false, String::from("reference.call")),
Assertion::new(5, 13, true, String::from("variable.parameter")),
]
);

View file

@ -0,0 +1,173 @@
use std::{iter, sync::Arc};
use crate::tests::helpers::fixtures::get_language;
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
(parser.parse(text, None).unwrap(), language)
}
fn parse_text_with<T, F>(callback: &mut F) -> (Tree, Language)
where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let language = get_language("c");
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse_with(callback, None).unwrap();
// eprintln!("{}", tree.clone().root_node().to_sexp());
assert_eq!("comment", tree.clone().root_node().child(0).unwrap().kind());
(tree, language)
}
fn tree_query<I: AsRef<[u8]>>(tree: &Tree, text: impl TextProvider<I>, language: Language) {
let query = Query::new(language, "((comment) @c (#eq? @c \"// comment\"))").unwrap();
let mut cursor = QueryCursor::new();
let mut captures = cursor.captures(&query, tree.root_node(), text);
let (match_, idx) = captures.next().unwrap();
let capture = match_.captures[idx];
assert_eq!(capture.index as usize, idx);
assert_eq!("comment", capture.node.kind());
}
fn check_parsing<I: AsRef<[u8]>>(
parser_text: impl AsRef<[u8]>,
text_provider: impl TextProvider<I>,
) {
let (tree, language) = parse_text(parser_text);
tree_query(&tree, text_provider, language);
}
fn check_parsing_callback<T, F, I: AsRef<[u8]>>(
parser_callback: &mut F,
text_provider: impl TextProvider<I>,
) where
T: AsRef<[u8]>,
F: FnMut(usize, Point) -> T,
{
let (tree, language) = parse_text_with(parser_callback);
tree_query(&tree, text_provider, language);
}
#[test]
fn test_text_provider_for_str_slice() {
let text: &str = "// comment";
check_parsing(text, text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
}
#[test]
fn test_text_provider_for_string() {
let text: String = "// comment".to_owned();
check_parsing(text.clone(), text.as_bytes());
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<[u8]>>::as_ref(&text), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_str_slice() {
let text: Box<str> = "// comment".to_owned().into_boxed_str();
check_parsing(text.as_bytes(), text.as_bytes());
check_parsing(<_ as AsRef<str>>::as_ref(&text), text.as_bytes());
check_parsing(text.as_ref(), text.as_ref().as_bytes());
check_parsing(text.as_ref(), text.as_bytes());
}
#[test]
fn test_text_provider_for_box_of_bytes_slice() {
let text: Box<[u8]> = "// comment".to_owned().into_boxed_str().into_boxed_bytes();
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.as_ref(), &*text);
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_vec_of_bytes() {
let text: Vec<u8> = "// comment".to_owned().into_bytes();
check_parsing(&*text, &*text);
}
#[test]
fn test_text_provider_for_arc_of_bytes_slice() {
let text: Vec<u8> = "// comment".to_owned().into_bytes();
let text: Arc<[u8]> = Arc::from(text);
check_parsing(&*text, &*text);
check_parsing(text.as_ref(), text.as_ref());
check_parsing(text.clone(), text.as_ref());
}
#[test]
fn test_text_provider_callback_with_str_slice() {
let text: &str = "// comment";
check_parsing(text, |_node: Node<'_>| iter::once(text));
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| iter::once(text),
);
}
#[test]
fn test_text_provider_callback_with_owned_string_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: String = text.to_owned();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_bytes_vec_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Vec<u8> = text.to_owned().into_bytes();
iter::once(slice)
},
);
}
#[test]
fn test_text_provider_callback_with_owned_arc_of_bytes_slice() {
let text: &str = "// comment";
check_parsing_callback(
&mut |offset, _point| {
(offset < text.len())
.then(|| text.as_bytes())
.unwrap_or_default()
},
|_node: Node<'_>| {
let slice: Arc<[u8]> = text.to_owned().into_bytes().into();
iter::once(slice)
},
);
}

View file

@ -306,7 +306,7 @@ fn test_tree_cursor() {
.parse(
"
struct Stuff {
a: A;
a: A,
b: Option<B>,
}
",
@ -331,6 +331,88 @@ fn test_tree_cursor() {
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "field_declaration_list");
assert_eq!(cursor.node().is_named(), true);
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "}");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert_eq!(cursor.node().is_named(), true);
assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), ",");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "field_declaration");
assert_eq!(cursor.node().is_named(), true);
assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 });
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "{");
assert_eq!(cursor.node().is_named(), false);
assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 });
let mut copy = tree.walk();
copy.reset_to(cursor);
assert_eq!(copy.node().kind(), "{");
assert_eq!(copy.node().is_named(), false);
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "field_declaration_list");
assert_eq!(copy.node().is_named(), true);
assert!(copy.goto_parent());
assert_eq!(copy.node().kind(), "struct_item");
}
#[test]
fn test_tree_cursor_previous_sibling() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
let text = "
// Hi there
// This is fun!
// Another one!
";
let tree = parser.parse(text, None).unwrap();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "source_file");
assert!(cursor.goto_last_child());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Another one!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// This is fun!"
);
assert!(cursor.goto_previous_sibling());
assert_eq!(cursor.node().kind(), "line_comment");
assert_eq!(
cursor.node().utf8_text(text.as_bytes()).unwrap(),
"// Hi there"
);
assert!(!cursor.goto_previous_sibling());
}
#[test]
@ -620,7 +702,7 @@ fn get_changed_ranges(
source_code: &mut Vec<u8>,
edit: Edit,
) -> Vec<Range> {
perform_edit(tree, source_code, &edit);
perform_edit(tree, source_code, &edit).unwrap();
let new_tree = parser.parse(&source_code, Some(tree)).unwrap();
let result = tree.changed_ranges(&new_tree).collect();
*tree = new_tree;

View file

@ -1,9 +1,7 @@
use anyhow::Result;
use std::io;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread;
use tree_sitter::Parser;
use tree_sitter::{Parser, Tree};
#[cfg(unix)]
use anyhow::{anyhow, Context};
@ -13,55 +11,86 @@ use std::path::PathBuf;
use std::process::{Child, ChildStdin, Command, Stdio};
#[cfg(unix)]
const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
const HTML_HEADER: &[u8] = b"
<!DOCTYPE html>
pub fn cancel_on_stdin() -> Arc<AtomicUsize> {
<style>
svg { width: 100%; }
</style>
";
pub fn cancel_on_signal() -> Arc<AtomicUsize> {
let result = Arc::new(AtomicUsize::new(0));
if atty::is(atty::Stream::Stdin) {
thread::spawn({
let flag = result.clone();
move || {
let mut line = String::new();
io::stdin().read_line(&mut line).unwrap();
flag.store(1, Ordering::Relaxed);
}
});
}
ctrlc::set_handler({
let flag = result.clone();
move || {
flag.store(1, Ordering::Relaxed);
}
})
.expect("Error setting Ctrl-C handler");
result
}
#[cfg(windows)]
pub struct LogSession();
pub struct LogSession;
#[cfg(unix)]
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
pub struct LogSession {
path: PathBuf,
dot_process: Option<Child>,
dot_process_stdin: Option<ChildStdin>,
}
#[cfg(windows)]
pub fn print_tree_graph(_tree: &Tree, _path: &str) -> Result<()> {
Ok(())
}
#[cfg(windows)]
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
Ok(LogSession())
Ok(LogSession)
}
#[cfg(unix)]
pub fn print_tree_graph(tree: &Tree, path: &str) -> Result<()> {
let session = LogSession::new(path)?;
tree.print_dot_graph(session.dot_process_stdin.as_ref().unwrap());
Ok(())
}
#[cfg(unix)]
pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
use std::io::Write;
let session = LogSession::new(path)?;
parser.print_dot_graphs(session.dot_process_stdin.as_ref().unwrap());
Ok(session)
}
let mut dot_file = std::fs::File::create(path)?;
dot_file.write(HTML_HEADER)?;
let mut dot_process = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.with_context(|| "Failed to run the `dot` command. Check that graphviz is installed.")?;
let dot_stdin = dot_process
.stdin
.take()
.ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
parser.print_dot_graphs(&dot_stdin);
Ok(LogSession(
PathBuf::from(path),
Some(dot_process),
Some(dot_stdin),
))
#[cfg(unix)]
impl LogSession {
fn new(path: &str) -> Result<Self> {
use std::io::Write;
let mut dot_file = std::fs::File::create(path)?;
dot_file.write(HTML_HEADER)?;
let mut dot_process = Command::new("dot")
.arg("-Tsvg")
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.with_context(|| {
"Failed to run the `dot` command. Check that graphviz is installed."
})?;
let dot_stdin = dot_process
.stdin
.take()
.ok_or_else(|| anyhow!("Failed to open stdin for `dot` process."))?;
Ok(Self {
path: PathBuf::from(path),
dot_process: Some(dot_process),
dot_process_stdin: Some(dot_stdin),
})
}
}
#[cfg(unix)]
@ -69,13 +98,13 @@ impl Drop for LogSession {
fn drop(&mut self) {
use std::fs;
drop(self.2.take().unwrap());
let output = self.1.take().unwrap().wait_with_output().unwrap();
drop(self.dot_process_stdin.take().unwrap());
let output = self.dot_process.take().unwrap().wait_with_output().unwrap();
if output.status.success() {
if cfg!(target_os = "macos")
&& fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64
&& fs::metadata(&self.path).unwrap().len() > HTML_HEADER.len() as u64
{
Command::new("open").arg(&self.0).output().unwrap();
Command::new("open").arg(&self.path).output().unwrap();
}
} else {
eprintln!(

View file

@ -1,5 +1,6 @@
use super::generate::parse_grammar::GrammarJSON;
use anyhow::{anyhow, Context, Result};
use path_slash::PathExt as _;
use std::{
ffi::{OsStr, OsString},
fs,
@ -60,7 +61,7 @@ pub fn compile_language_to_wasm(
volume_string = OsString::from(parent);
volume_string.push(":/src:Z");
command.arg("--workdir");
command.arg(&Path::new("/src").join(filename));
command.arg(Path::new("/src").join(filename).to_slash_lossy().as_ref());
} else {
volume_string = OsString::from(language_dir);
volume_string.push(":/src:Z");
@ -84,6 +85,11 @@ pub fn compile_language_to_wasm(
// Run `emcc` in a container using the `emscripten-slim` image
command.args(&[EMSCRIPTEN_TAG, "emcc"]);
} else {
if force_docker {
return Err(anyhow!(
"You must have docker on your PATH to run this command with --docker"
));
}
return Err(anyhow!(
"You must have either emcc or docker on your PATH to run this command"
));
@ -116,14 +122,18 @@ pub fn compile_language_to_wasm(
let scanner_cpp_path = src.join("scanner.cpp");
if language_dir.join(&scanner_cc_path).exists() {
command.arg("-xc++").arg(&scanner_cc_path);
command
.arg("-xc++")
.arg(scanner_cc_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_cpp_path).exists() {
command.arg("-xc++").arg(&scanner_cpp_path);
command
.arg("-xc++")
.arg(scanner_cpp_path.to_slash_lossy().as_ref());
} else if language_dir.join(&scanner_c_path).exists() {
command.arg(&scanner_c_path);
command.arg(scanner_c_path.to_slash_lossy().as_ref());
}
command.arg(&parser_c_path);
command.arg(parser_c_path.to_slash_lossy().as_ref());
let output = command
.output()