diff --git a/Cargo.lock b/Cargo.lock index cd411095..c13deb13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -123,9 +123,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.25" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16" +checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48" [[package]] name = "cfg-if" @@ -272,6 +272,15 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +[[package]] +name = "html-escape" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d348900ce941b7474395ba922ed3735a517df4546a2939ddb416ce85eeaa988e" +dependencies = [ + "utf8-width", +] + [[package]] name = "idna" version = "0.1.5" @@ -832,7 +841,7 @@ dependencies = [ [[package]] name = "tree-sitter-cli" -version = "0.17.3" +version = "0.18.0" dependencies = [ "ansi_term", "atty", @@ -841,6 +850,7 @@ dependencies = [ "difference", "dirs", "glob", + "html-escape", "lazy_static", "libloading", "log", @@ -940,6 +950,12 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737" +[[package]] +name = "utf8-width" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa" + [[package]] name = "vec_map" version = "0.8.1" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 1748516d..b3029c6a 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "tree-sitter-cli" description = "CLI tool for developing, testing, and using Tree-sitter parsers" -version = "0.17.3" +version = "0.18.0" authors = ["Max Brunsfeld "] edition = "2018" license = "MIT" @@ -20,7 +20,7 @@ harness = false [dependencies] ansi_term = "0.11" -cc = "1.0" +cc = "^1.0.58" atty = "0.2" clap = "2.32" difference = "2.0" @@ -36,6 +36,7 @@ serde_derive = "1.0" smallbitvec = "2.3.0" tiny_http = "0.6" webbrowser = "0.5.1" +html-escape = "0.2.6" [dependencies.tree-sitter] version = ">= 0.17.0" diff --git a/cli/npm/package.json b/cli/npm/package.json index 4c6dfe90..85cf5da6 100644 --- a/cli/npm/package.json +++ b/cli/npm/package.json @@ -1,6 +1,6 @@ { "name": "tree-sitter-cli", - "version": "0.17.3", + "version": "0.18.0", "author": "Max Brunsfeld", "license": "MIT", "repository": { diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index c63701ee..bfb01736 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -577,7 +577,7 @@ impl<'a> ParseTableBuilder<'a> { "(precedence: {}, associativity: {:?})", precedence, associativity )) - } else if precedence > 0 { + } else if precedence != 0 { Some(format!("(precedence: {})", precedence)) } else { None @@ -619,6 +619,28 @@ impl<'a> ParseTableBuilder<'a> { } shift_items.sort_unstable(); reduce_items.sort_unstable(); + + let list_rule_names = |mut msg: &mut String, items: &[&ParseItem]| { + let mut last_rule_id = None; + for item in items { + if last_rule_id == Some(item.variable_index) { + continue; + } + + if last_rule_id.is_some() { + write!(&mut msg, " and").unwrap(); + } + + last_rule_id = Some(item.variable_index); + write!( + msg, + " `{}`", + self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)) + ) + .unwrap(); + } + }; + if actual_conflict.len() > 1 { if shift_items.len() > 0 { resolution_count += 1; @@ -628,17 +650,7 @@ impl<'a> ParseTableBuilder<'a> { resolution_count ) .unwrap(); - for (i, item) in shift_items.iter().enumerate() { - if i > 0 { - write!(&mut msg, " and").unwrap(); - } - write!( - &mut msg, - " `{}`", - self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)) - ) - .unwrap(); - } + list_rule_names(&mut msg, &shift_items); write!(&mut msg, " than in the other rules.\n").unwrap(); } @@ -658,21 +670,11 @@ impl<'a> ParseTableBuilder<'a> { resolution_count += 1; write!( &mut msg, - " {}: Specify a left or right associativity in ", + " {}: Specify a left or right associativity in", resolution_count ) .unwrap(); - for (i, item) in reduce_items.iter().enumerate() { - if i > 0 { - write!(&mut msg, " and ").unwrap(); - } - write!( - &mut msg, - "`{}`", - self.symbol_name(&Symbol::non_terminal(item.variable_index as usize)) - ) - .unwrap(); - } + list_rule_names(&mut msg, &reduce_items); write!(&mut msg, "\n").unwrap(); } diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index 2e5d2f57..fba0fef3 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -13,7 +13,7 @@ use self::minimize_parse_table::minimize_parse_table; use self::token_conflicts::TokenConflictMap; use crate::error::Result; use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; -use crate::generate::nfa::{CharacterSet, NfaCursor}; +use crate::generate::nfa::NfaCursor; use crate::generate::node_types::VariableInfo; use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet}; use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry}; @@ -472,10 +472,8 @@ fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool { cursor.transition_chars().all(|(chars, is_sep)| { if is_sep { true - } else if let CharacterSet::Include(chars) = chars { - chars.iter().all(|c| c.is_alphabetic() || *c == '_') } else { - false + chars.chars().all(|c| c.is_alphabetic() || c == '_') } }) } diff --git a/cli/src/generate/nfa.rs b/cli/src/generate/nfa.rs index 4cbfaaa3..99f595d0 100644 --- a/cli/src/generate/nfa.rs +++ b/cli/src/generate/nfa.rs @@ -6,10 +6,9 @@ use std::fmt; use std::mem::swap; use std::ops::Range; -#[derive(Clone, Debug, PartialEq, Eq, Hash)] -pub enum CharacterSet { - Include(Vec), - Exclude(Vec), +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct CharacterSet { + ranges: Vec>, } #[derive(Debug, PartialEq, Eq)] @@ -52,142 +51,233 @@ impl Default for Nfa { } } +const END: u32 = char::MAX as u32 + 1; + impl CharacterSet { pub fn empty() -> Self { - CharacterSet::Include(Vec::new()) + CharacterSet { ranges: Vec::new() } } - pub fn negate(self) -> CharacterSet { - match self { - CharacterSet::Include(chars) => CharacterSet::Exclude(chars), - CharacterSet::Exclude(chars) => CharacterSet::Include(chars), + pub fn from_range(mut first: char, mut last: char) -> Self { + if first > last { + swap(&mut first, &mut last); + } + CharacterSet { + ranges: vec![(first as u32)..(last as u32 + 1)], } } - pub fn add_char(self, c: char) -> Self { - if let CharacterSet::Include(mut chars) = self { - if let Err(i) = chars.binary_search(&c) { - chars.insert(i, c); + pub fn from_char(c: char) -> Self { + CharacterSet { + ranges: vec![(c as u32)..(c as u32 + 1)], + } + } + + pub fn negate(mut self) -> CharacterSet { + let mut i = 0; + let mut previous_end = 0; + while i < self.ranges.len() { + let range = &mut self.ranges[i]; + let start = previous_end; + previous_end = range.end; + if start < range.start { + self.ranges[i] = start..range.start; + i += 1; + } else { + self.ranges.remove(i); } - CharacterSet::Include(chars) - } else { - panic!("Called add with a negated character set"); } + if previous_end < END { + self.ranges.push(previous_end..END); + } + self } - pub fn add_range(self, start: char, end: char) -> Self { - if let CharacterSet::Include(mut chars) = self { - let mut c = start as u32; - while c <= end as u32 { - chars.push(char::from_u32(c).unwrap()); - c += 1; + pub fn add_char(mut self, c: char) -> Self { + self.add_int_range(0, c as u32, c as u32 + 1); + self + } + + pub fn add_range(mut self, start: char, end: char) -> Self { + self.add_int_range(0, start as u32, end as u32 + 1); + self + } + + pub fn add(mut self, other: &CharacterSet) -> Self { + let mut index = 0; + for range in &other.ranges { + index = self.add_int_range(index, range.start as u32, range.end as u32); + } + self + } + + fn add_int_range(&mut self, mut i: usize, start: u32, end: u32) -> usize { + while i < self.ranges.len() { + let range = &mut self.ranges[i]; + if range.start > end { + self.ranges.insert(i, start..end); + return i; } - chars.sort_unstable(); - chars.dedup(); - CharacterSet::Include(chars) - } else { - panic!("Called add with a negated character set"); - } - } - - pub fn add(self, other: &CharacterSet) -> Self { - match self { - CharacterSet::Include(mut chars) => match other { - CharacterSet::Include(other_chars) => { - chars.extend(other_chars); - chars.sort_unstable(); - chars.dedup(); - CharacterSet::Include(chars) - } - CharacterSet::Exclude(other_chars) => { - let excluded_chars = other_chars - .iter() - .cloned() - .filter(|c| !chars.contains(&c)) - .collect(); - CharacterSet::Exclude(excluded_chars) - } - }, - CharacterSet::Exclude(mut chars) => match other { - CharacterSet::Include(other_chars) => { - chars.retain(|c| !other_chars.contains(&c)); - CharacterSet::Exclude(chars) - } - CharacterSet::Exclude(other_chars) => { - chars.retain(|c| other_chars.contains(&c)); - CharacterSet::Exclude(chars) - } - }, + if range.end >= start { + range.end = range.end.max(end); + range.start = range.start.min(start); + return i; + } + i += 1; } + self.ranges.push(start..end); + i } pub fn does_intersect(&self, other: &CharacterSet) -> bool { - match self { - CharacterSet::Include(chars) => match other { - CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).common, - CharacterSet::Exclude(other_chars) => compare_chars(chars, other_chars).left_only, - }, - CharacterSet::Exclude(chars) => match other { - CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).right_only, - CharacterSet::Exclude(_) => true, - }, + let mut left_ranges = self.ranges.iter(); + let mut right_ranges = other.ranges.iter(); + let mut left_range = left_ranges.next(); + let mut right_range = right_ranges.next(); + while let (Some(left), Some(right)) = (&left_range, &right_range) { + if left.end <= right.start { + left_range = left_ranges.next(); + } else if left.start >= right.end { + right_range = right_ranges.next(); + } else { + return true; + } } + false } pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet { - match self { - CharacterSet::Include(chars) => match other { - CharacterSet::Include(other_chars) => { - CharacterSet::Include(remove_chars(chars, other_chars, true)) + let mut intersection = Vec::new(); + let mut left_i = 0; + let mut right_i = 0; + while left_i < self.ranges.len() && right_i < other.ranges.len() { + let left = &mut self.ranges[left_i]; + let right = &mut other.ranges[right_i]; + + match left.start.cmp(&right.start) { + Ordering::Less => { + // [ L ] + // [ R ] + if left.end <= right.start { + left_i += 1; + continue; + } + + match left.end.cmp(&right.end) { + // [ L ] + // [ R ] + Ordering::Less => { + intersection.push(right.start..left.end); + swap(&mut left.end, &mut right.start); + left_i += 1; + } + + // [ L ] + // [ R ] + Ordering::Equal => { + intersection.push(right.clone()); + left.end = right.start; + other.ranges.remove(right_i); + } + + // [ L ] + // [ R ] + Ordering::Greater => { + intersection.push(right.clone()); + let new_range = left.start..right.start; + left.start = right.end; + self.ranges.insert(left_i, new_range); + other.ranges.remove(right_i); + left_i += 1; + } + } } - CharacterSet::Exclude(other_chars) => { - let mut removed = remove_chars(chars, other_chars, false); - add_chars(other_chars, chars); - swap(&mut removed, chars); - CharacterSet::Include(removed) + Ordering::Equal => { + // [ L ] + // [ R ] + if left.end < right.end { + intersection.push(left.start..left.end); + right.start = left.end; + self.ranges.remove(left_i); + } + // [ L ] + // [ R ] + else if left.end == right.end { + intersection.push(left.clone()); + self.ranges.remove(left_i); + other.ranges.remove(right_i); + } + // [ L ] + // [ R ] + else if left.end > right.end { + intersection.push(right.clone()); + left.start = right.end; + other.ranges.remove(right_i); + } } - }, - CharacterSet::Exclude(chars) => match other { - CharacterSet::Include(other_chars) => { - let mut removed = remove_chars(other_chars, chars, false); - add_chars(chars, other_chars); - swap(&mut removed, other_chars); - CharacterSet::Include(removed) + Ordering::Greater => { + // [ L ] + // [ R ] + if left.start >= right.end { + right_i += 1; + continue; + } + + match left.end.cmp(&right.end) { + // [ L ] + // [ R ] + Ordering::Less => { + intersection.push(left.clone()); + let new_range = right.start..left.start; + right.start = left.end; + other.ranges.insert(right_i, new_range); + self.ranges.remove(left_i); + right_i += 1; + } + + // [ L ] + // [ R ] + Ordering::Equal => { + intersection.push(left.clone()); + right.end = left.start; + self.ranges.remove(left_i); + } + + // [ L ] + // [ R ] + Ordering::Greater => { + intersection.push(left.start..right.end); + swap(&mut left.start, &mut right.end); + right_i += 1; + } + } } - CharacterSet::Exclude(other_chars) => { - let mut result_exclusion = chars.clone(); - result_exclusion.extend(other_chars.iter().cloned()); - result_exclusion.sort_unstable(); - result_exclusion.dedup(); - remove_chars(chars, other_chars, true); - let mut included_characters = Vec::new(); - let mut other_included_characters = Vec::new(); - swap(&mut included_characters, other_chars); - swap(&mut other_included_characters, chars); - *self = CharacterSet::Include(included_characters); - *other = CharacterSet::Include(other_included_characters); - CharacterSet::Exclude(result_exclusion) - } - }, + } } + CharacterSet { + ranges: intersection, + } + } + + pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + self.ranges.iter().flat_map(|r| r.clone()) + } + + pub fn chars<'a>(&'a self) -> impl Iterator + 'a { + self.iter().filter_map(char::from_u32) } pub fn is_empty(&self) -> bool { - if let CharacterSet::Include(c) = self { - c.is_empty() - } else { - false - } + self.ranges.is_empty() } - pub fn ranges<'a>( - chars: &'a Vec, + pub fn simplify_ignoring<'a>( + &'a self, ruled_out_characters: &'a HashSet, - ) -> impl Iterator> + 'a { + ) -> Vec> { let mut prev_range: Option> = None; - chars - .iter() - .map(|c| (*c, false)) + self.chars() + .map(|c| (c, false)) .chain(Some(('\0', true))) .filter_map(move |(c, done)| { if done { @@ -212,35 +302,40 @@ impl CharacterSet { None } }) + .collect() } - #[cfg(test)] pub fn contains(&self, c: char) -> bool { - match self { - CharacterSet::Include(chars) => chars.contains(&c), - CharacterSet::Exclude(chars) => !chars.contains(&c), - } + self.ranges.iter().any(|r| r.contains(&(c as u32))) } } impl Ord for CharacterSet { fn cmp(&self, other: &CharacterSet) -> Ordering { - match self { - CharacterSet::Include(chars) => { - if let CharacterSet::Include(other_chars) = other { - order_chars(chars, other_chars) - } else { - Ordering::Less - } + let count_cmp = self + .ranges + .iter() + .map(|r| r.len()) + .sum::() + .cmp(&other.ranges.iter().map(|r| r.len()).sum()); + if count_cmp != Ordering::Equal { + return count_cmp; + } + + for (left_range, right_range) in self.ranges.iter().zip(other.ranges.iter()) { + let cmp = left_range.len().cmp(&right_range.len()); + if cmp != Ordering::Equal { + return cmp; } - CharacterSet::Exclude(chars) => { - if let CharacterSet::Exclude(other_chars) = other { - order_chars(chars, other_chars) - } else { - Ordering::Greater + + for (left, right) in left_range.clone().zip(right_range.clone()) { + let cmp = left.cmp(&right); + if cmp != Ordering::Equal { + return cmp; } } } + return Ordering::Equal; } } @@ -250,89 +345,22 @@ impl PartialOrd for CharacterSet { } } -fn add_chars(left: &mut Vec, right: &Vec) { - for c in right { - match left.binary_search(c) { - Err(i) => left.insert(i, *c), - _ => {} +impl fmt::Debug for CharacterSet { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "CharacterSet [")?; + let mut set = self.clone(); + if self.contains(char::MAX) { + write!(f, "^ ")?; + set = set.negate(); } - } -} - -fn remove_chars(left: &mut Vec, right: &mut Vec, mutate_right: bool) -> Vec { - let mut result = Vec::new(); - right.retain(|right_char| { - if let Some(index) = left.iter().position(|left_char| *left_char == *right_char) { - left.remove(index); - result.push(*right_char); - false || !mutate_right - } else { - true - } - }); - result -} - -struct SetComparision { - left_only: bool, - common: bool, - right_only: bool, -} - -fn compare_chars(left: &Vec, right: &Vec) -> SetComparision { - let mut result = SetComparision { - left_only: false, - common: false, - right_only: false, - }; - let mut left = left.iter().cloned(); - let mut right = right.iter().cloned(); - let mut i = left.next(); - let mut j = right.next(); - while let (Some(left_char), Some(right_char)) = (i, j) { - if left_char < right_char { - i = left.next(); - result.left_only = true; - } else if left_char > right_char { - j = right.next(); - result.right_only = true; - } else { - i = left.next(); - j = right.next(); - result.common = true; - } - } - - match (i, j) { - (Some(_), _) => result.left_only = true, - (_, Some(_)) => result.right_only = true, - _ => {} - } - - result -} - -fn order_chars(chars: &Vec, other_chars: &Vec) -> Ordering { - if chars.is_empty() { - if other_chars.is_empty() { - Ordering::Equal - } else { - Ordering::Less - } - } else if other_chars.is_empty() { - Ordering::Greater - } else { - let cmp = chars.len().cmp(&other_chars.len()); - if cmp != Ordering::Equal { - return cmp; - } - for (c, other_c) in chars.iter().zip(other_chars.iter()) { - let cmp = c.cmp(other_c); - if cmp != Ordering::Equal { - return cmp; + for (i, c) in set.chars().enumerate() { + if i > 0 { + write!(f, ", ")?; } + write!(f, "{:?}", c)?; } - Ordering::Equal + write!(f, "]")?; + Ok(()) } } @@ -624,48 +652,46 @@ mod tests { // multiple negated character classes ( vec![ - (CharacterSet::Include(vec!['a']), false, 0, 1), - (CharacterSet::Exclude(vec!['a', 'b', 'c']), false, 0, 2), - (CharacterSet::Include(vec!['g']), false, 0, 6), - (CharacterSet::Exclude(vec!['d', 'e', 'f']), false, 0, 3), - (CharacterSet::Exclude(vec!['g', 'h', 'i']), false, 0, 4), - (CharacterSet::Include(vec!['g']), false, 0, 5), + (CharacterSet::from_char('a'), false, 0, 1), + (CharacterSet::from_range('a', 'c').negate(), false, 0, 2), + (CharacterSet::from_char('g'), false, 0, 6), + (CharacterSet::from_range('d', 'f').negate(), false, 0, 3), + (CharacterSet::from_range('g', 'i').negate(), false, 0, 4), + (CharacterSet::from_char('g'), false, 0, 5), ], vec![ NfaTransition { - characters: CharacterSet::Include(vec!['a']), + characters: CharacterSet::from_char('a'), precedence: 0, states: vec![1, 3, 4], is_separator: false, }, NfaTransition { - characters: CharacterSet::Include(vec!['g']), + characters: CharacterSet::from_char('g'), precedence: 0, states: vec![2, 3, 5, 6], is_separator: false, }, NfaTransition { - characters: CharacterSet::Include(vec!['b', 'c']), + characters: CharacterSet::from_range('b', 'c'), precedence: 0, states: vec![3, 4], is_separator: false, }, NfaTransition { - characters: CharacterSet::Include(vec!['h', 'i']), + characters: CharacterSet::from_range('h', 'i'), precedence: 0, states: vec![2, 3], is_separator: false, }, NfaTransition { - characters: CharacterSet::Include(vec!['d', 'e', 'f']), + characters: CharacterSet::from_range('d', 'f'), precedence: 0, states: vec![2, 4], is_separator: false, }, NfaTransition { - characters: CharacterSet::Exclude(vec![ - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', - ]), + characters: CharacterSet::from_range('a', 'i').negate(), precedence: 0, states: vec![2, 3, 4], is_separator: false, @@ -675,21 +701,21 @@ mod tests { // disjoint characters with same state ( vec![ - (CharacterSet::Include(vec!['a']), false, 0, 1), - (CharacterSet::Include(vec!['b']), false, 0, 2), - (CharacterSet::Include(vec!['c']), false, 0, 1), - (CharacterSet::Include(vec!['d']), false, 0, 1), - (CharacterSet::Include(vec!['e']), false, 0, 2), + (CharacterSet::from_char('a'), false, 0, 1), + (CharacterSet::from_char('b'), false, 0, 2), + (CharacterSet::from_char('c'), false, 0, 1), + (CharacterSet::from_char('d'), false, 0, 1), + (CharacterSet::from_char('e'), false, 0, 2), ], vec![ NfaTransition { - characters: CharacterSet::Include(vec!['a', 'c', 'd']), + characters: CharacterSet::empty().add_char('a').add_range('c', 'd'), precedence: 0, states: vec![1], is_separator: false, }, NfaTransition { - characters: CharacterSet::Include(vec!['b', 'e']), + characters: CharacterSet::empty().add_char('b').add_char('e'), precedence: 0, states: vec![2], is_separator: false, @@ -698,119 +724,129 @@ mod tests { ), ]; - for row in table.iter() { + for (i, row) in table.iter().enumerate() { assert_eq!( NfaCursor::group_transitions( row.0 .iter() .map(|(chars, is_sep, prec, state)| (chars, *is_sep, *prec, *state)) ), - row.1 + row.1, + "row {}", + i ); } } #[test] fn test_character_set_remove_intersection() { - // A whitelist and an overlapping whitelist. - // Both sets contain 'c', 'd', and 'f' - let mut a = CharacterSet::empty().add_range('a', 'f'); - let mut b = CharacterSet::empty().add_range('c', 'h'); - assert_eq!( - a.remove_intersection(&mut b), - CharacterSet::empty().add_range('c', 'f') - ); - assert_eq!(a, CharacterSet::empty().add_range('a', 'b')); - assert_eq!(b, CharacterSet::empty().add_range('g', 'h')); + struct Row { + left: CharacterSet, + right: CharacterSet, + left_only: CharacterSet, + right_only: CharacterSet, + intersection: CharacterSet, + } - let mut a = CharacterSet::empty().add_range('a', 'f'); - let mut b = CharacterSet::empty().add_range('c', 'h'); - assert_eq!( - b.remove_intersection(&mut a), - CharacterSet::empty().add_range('c', 'f') - ); - assert_eq!(a, CharacterSet::empty().add_range('a', 'b')); - assert_eq!(b, CharacterSet::empty().add_range('g', 'h')); + let rows = [ + // [ L ] + // [ R ] + Row { + left: CharacterSet::from_range('a', 'f'), + right: CharacterSet::from_range('g', 'm'), + left_only: CharacterSet::from_range('a', 'f'), + right_only: CharacterSet::from_range('g', 'm'), + intersection: CharacterSet::empty(), + }, + // [ L ] + // [ R ] + Row { + left: CharacterSet::from_range('a', 'f'), + right: CharacterSet::from_range('c', 'i'), + left_only: CharacterSet::from_range('a', 'b'), + right_only: CharacterSet::from_range('g', 'i'), + intersection: CharacterSet::from_range('c', 'f'), + }, + // [ L ] + // [ R ] + Row { + left: CharacterSet::from_range('a', 'f'), + right: CharacterSet::from_range('d', 'f'), + left_only: CharacterSet::from_range('a', 'c'), + right_only: CharacterSet::empty(), + intersection: CharacterSet::from_range('d', 'f'), + }, + // [ L ] + // [ R ] + Row { + left: CharacterSet::from_range('a', 'm'), + right: CharacterSet::from_range('d', 'f'), + left_only: CharacterSet::empty() + .add_range('a', 'c') + .add_range('g', 'm'), + right_only: CharacterSet::empty(), + intersection: CharacterSet::from_range('d', 'f'), + }, + // [ L1 ] [ L2 ] + // [ R ] + Row { + left: CharacterSet::empty() + .add_range('a', 'e') + .add_range('h', 'l'), + right: CharacterSet::from_range('c', 'i'), + left_only: CharacterSet::empty() + .add_range('a', 'b') + .add_range('j', 'l'), + right_only: CharacterSet::from_range('f', 'g'), + intersection: CharacterSet::empty() + .add_range('c', 'e') + .add_range('h', 'i'), + }, + ]; - // A whitelist and a larger whitelist. - let mut a = CharacterSet::empty().add_char('c'); - let mut b = CharacterSet::empty().add_range('a', 'e'); - assert_eq!( - a.remove_intersection(&mut b), - CharacterSet::empty().add_char('c') - ); - assert_eq!(a, CharacterSet::empty()); - assert_eq!( - b, - CharacterSet::empty() - .add_range('a', 'b') - .add_range('d', 'e') - ); + for (i, row) in rows.iter().enumerate() { + let mut left = row.left.clone(); + let mut right = row.right.clone(); + assert_eq!( + left.remove_intersection(&mut right), + row.intersection, + "row {}a: {:?} && {:?}", + i, + row.left, + row.right + ); + assert_eq!( + left, row.left_only, + "row {}a: {:?} - {:?}", + i, row.left, row.right + ); + assert_eq!( + right, row.right_only, + "row {}a: {:?} - {:?}", + i, row.right, row.left + ); - let mut a = CharacterSet::empty().add_char('c'); - let mut b = CharacterSet::empty().add_range('a', 'e'); - assert_eq!( - b.remove_intersection(&mut a), - CharacterSet::empty().add_char('c') - ); - assert_eq!(a, CharacterSet::empty()); - assert_eq!( - b, - CharacterSet::empty() - .add_range('a', 'b') - .add_range('d', 'e') - ); - - // An inclusion and an intersecting exclusion. - // Both sets contain 'e', 'f', and 'm' - let mut a = CharacterSet::empty() - .add_range('c', 'h') - .add_range('k', 'm'); - let mut b = CharacterSet::empty() - .add_range('a', 'd') - .add_range('g', 'l') - .negate(); - assert_eq!( - a.remove_intersection(&mut b), - CharacterSet::Include(vec!['e', 'f', 'm']) - ); - assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l'])); - assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate()); - - let mut a = CharacterSet::empty() - .add_range('c', 'h') - .add_range('k', 'm'); - let mut b = CharacterSet::empty() - .add_range('a', 'd') - .add_range('g', 'l') - .negate(); - assert_eq!( - b.remove_intersection(&mut a), - CharacterSet::Include(vec!['e', 'f', 'm']) - ); - assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l'])); - assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate()); - - // An exclusion and an overlapping inclusion. - // Both sets exclude 'c', 'd', and 'e' - let mut a = CharacterSet::empty().add_range('a', 'e').negate(); - let mut b = CharacterSet::empty().add_range('c', 'h').negate(); - assert_eq!( - a.remove_intersection(&mut b), - CharacterSet::empty().add_range('a', 'h').negate(), - ); - assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h'])); - assert_eq!(b, CharacterSet::Include(vec!['a', 'b'])); - - // An exclusion and a larger exclusion. - let mut a = CharacterSet::empty().add_range('b', 'c').negate(); - let mut b = CharacterSet::empty().add_range('a', 'd').negate(); - assert_eq!( - a.remove_intersection(&mut b), - CharacterSet::empty().add_range('a', 'd').negate(), - ); - assert_eq!(a, CharacterSet::empty().add_char('a').add_char('d')); - assert_eq!(b, CharacterSet::empty()); + let mut left = row.left.clone(); + let mut right = row.right.clone(); + assert_eq!( + right.remove_intersection(&mut left), + row.intersection, + "row {}b: {:?} && {:?}", + i, + row.left, + row.right + ); + assert_eq!( + left, row.left_only, + "row {}b: {:?} - {:?}", + i, row.left, row.right + ); + assert_eq!( + right, row.right_only, + "row {}b: {:?} - {:?}", + i, row.right, row.left + ); + } } #[test] @@ -834,29 +870,29 @@ mod tests { assert!(!b.does_intersect(&a)); let (a, b) = ( - CharacterSet::Include(vec!['b']), - CharacterSet::Exclude(vec!['a', 'b', 'c']), + CharacterSet::from_char('b'), + CharacterSet::from_range('a', 'c'), + ); + assert!(a.does_intersect(&b)); + assert!(b.does_intersect(&a)); + + let (a, b) = ( + CharacterSet::from_char('b'), + CharacterSet::from_range('a', 'c').negate(), ); assert!(!a.does_intersect(&b)); assert!(!b.does_intersect(&a)); let (a, b) = ( - CharacterSet::Include(vec!['b']), - CharacterSet::Exclude(vec!['a', 'c']), + CharacterSet::from_char('a').negate(), + CharacterSet::from_char('a').negate(), ); assert!(a.does_intersect(&b)); assert!(b.does_intersect(&a)); let (a, b) = ( - CharacterSet::Exclude(vec!['a']), - CharacterSet::Exclude(vec!['a']), - ); - assert!(a.does_intersect(&b)); - assert!(b.does_intersect(&a)); - - let (a, b) = ( - CharacterSet::Include(vec!['c']), - CharacterSet::Exclude(vec!['a']), + CharacterSet::from_char('c'), + CharacterSet::from_char('a').negate(), ); assert!(a.does_intersect(&b)); assert!(b.does_intersect(&a)); @@ -898,7 +934,11 @@ mod tests { .into_iter() .map(|c: &char| *c as u32) .collect(); - let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::>(); + let mut set = CharacterSet::empty(); + for c in chars { + set = set.add_char(*c); + } + let ranges = set.simplify_ignoring(&ruled_out_chars); assert_eq!(ranges, *expected_ranges); } } diff --git a/cli/src/generate/prepare_grammar/expand_tokens.rs b/cli/src/generate/prepare_grammar/expand_tokens.rs index 2b88762b..92c54b71 100644 --- a/cli/src/generate/prepare_grammar/expand_tokens.rs +++ b/cli/src/generate/prepare_grammar/expand_tokens.rs @@ -12,7 +12,7 @@ use std::i32; lazy_static! { static ref CURLY_BRACE_REGEX: Regex = - Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-F,}][^}]*)\}"#).unwrap(); + Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap(); } const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/']; @@ -198,11 +198,11 @@ impl NfaBuilder { Ast::Empty(_) => Ok(false), Ast::Flags(_) => Err(Error::regex("Flags are not supported")), Ast::Literal(literal) => { - self.push_advance(CharacterSet::Include(vec![literal.c]), next_state_id); + self.push_advance(CharacterSet::from_char(literal.c), next_state_id); Ok(true) } Ast::Dot(_) => { - self.push_advance(CharacterSet::Exclude(vec!['\n']), next_state_id); + self.push_advance(CharacterSet::from_char('\n').negate(), next_state_id); Ok(true) } Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")), @@ -344,11 +344,9 @@ impl NfaBuilder { fn expand_character_class(&self, item: &ClassSetItem) -> Result { match item { - ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())), - ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])), - ClassSetItem::Range(range) => { - Ok(CharacterSet::empty().add_range(range.start.c, range.end.c)) - } + ClassSetItem::Empty(_) => Ok(CharacterSet::empty()), + ClassSetItem::Literal(literal) => Ok(CharacterSet::from_char(literal.c)), + ClassSetItem::Range(range) => Ok(CharacterSet::from_range(range.start.c, range.end.c)), ClassSetItem::Union(union) => { let mut result = CharacterSet::empty(); for item in &union.items { @@ -366,7 +364,7 @@ impl NfaBuilder { fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet { match item { - ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'), + ClassPerlKind::Digit => CharacterSet::from_range('0', '9'), ClassPerlKind::Space => CharacterSet::empty() .add_char(' ') .add_char('\t') @@ -653,12 +651,15 @@ mod tests { Rule::pattern(r#"\{[ab]{3}\}"#), // Unicode codepoints Rule::pattern(r#"\u{1000A}"#), + // Unicode codepoints (lowercase) + Rule::pattern(r#"\u{1000b}"#), ], separators: vec![], examples: vec![ ("u{1234} ok", Some((0, "u{1234}"))), ("{aba}}", Some((1, "{aba}"))), ("\u{1000A}", Some((2, "\u{1000A}"))), + ("\u{1000b}", Some((3, "\u{1000b}"))), ], }, ]; diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 58d99cc4..362f357c 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1,5 +1,4 @@ use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}; -use super::nfa::CharacterSet; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; use super::tables::{ AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, @@ -659,21 +658,19 @@ impl Generator { .advance_actions .iter() .map(|(chars, action)| { - let (chars, is_included) = match chars { - CharacterSet::Include(c) => (c, true), - CharacterSet::Exclude(c) => (c, false), - }; - let mut call_id = None; - let mut ranges = - CharacterSet::ranges(chars, &ruled_out_chars).collect::>(); + let is_included = !chars.contains(std::char::MAX); + let mut ranges; if is_included { - ruled_out_chars.extend(chars.iter().map(|c| *c as u32)); + ranges = chars.simplify_ignoring(&ruled_out_chars); + ruled_out_chars.extend(chars.iter()); } else { + ranges = chars.clone().negate().simplify_ignoring(&ruled_out_chars); ranges.insert(0, '\0'..'\0') } // Record any large character sets so that they can be extracted // into helper functions, reducing code duplication. + let mut call_id = None; if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT { let char_set_symbol = self .symbol_for_advance_action(action, &lex_table) @@ -887,11 +884,16 @@ impl Generator { add!(self, " &&{}lookahead != ", line_break); self.add_character(range.end); } else { - add!(self, "(lookahead < "); - self.add_character(range.start); - add!(self, " || "); - self.add_character(range.end); - add!(self, " < lookahead)"); + if range.start != '\0' { + add!(self, "(lookahead < "); + self.add_character(range.start); + add!(self, " || "); + self.add_character(range.end); + add!(self, " < lookahead)"); + } else { + add!(self, "lookahead > "); + self.add_character(range.end); + } } } did_add = true; diff --git a/cli/src/generate/templates/index.js b/cli/src/generate/templates/index.js index 8f342b15..e6746235 100644 --- a/cli/src/generate/templates/index.js +++ b/cli/src/generate/templates/index.js @@ -1,10 +1,14 @@ try { module.exports = require("./build/Release/tree_sitter_PARSER_NAME_binding"); } catch (error) { - try { - module.exports = require("./build/Debug/tree_sitter_PARSER_NAME_binding"); - } catch (_) { + if (error.code !== 'MODULE_NOT_FOUND') throw error + else try { + module.exports = require("./build/Debug/tree_sitter_PARSER_NAME_binding"); + } catch (error2) { + if (error2.code === 'MODULE_NOT_FOUND') + throw error + throw error2 } } diff --git a/cli/src/main.rs b/cli/src/main.rs index 64ec7253..1cf90c67 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -63,6 +63,7 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) + .arg(Arg::with_name("debug-xml").long("xml").short("x")) .arg(Arg::with_name("quiet").long("quiet").short("q")) .arg(Arg::with_name("stat").long("stat").short("s")) .arg(Arg::with_name("time").long("time").short("t")) @@ -119,6 +120,7 @@ fn run() -> error::Result<()> { .short("f") .takes_value(true), ) + .arg(Arg::with_name("update").long("update").short("u")) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")), ) @@ -193,6 +195,7 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("test") { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); + let update = matches.is_present("update"); let filter = matches.value_of("filter"); let languages = loader.languages_at_path(¤t_dir)?; let language = languages @@ -206,7 +209,7 @@ fn run() -> error::Result<()> { test_corpus_dir = current_dir.join("corpus"); } if test_corpus_dir.is_dir() { - test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter)?; + test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter, update)?; } // Check that all of the queries are valid. @@ -220,6 +223,7 @@ fn run() -> error::Result<()> { } else if let Some(matches) = matches.subcommand_matches("parse") { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); + let debug_xml = matches.is_present("debug-xml"); let quiet = matches.is_present("quiet"); let time = matches.is_present("time"); let edits = matches @@ -255,6 +259,7 @@ fn run() -> error::Result<()> { timeout, debug, debug_graph, + debug_xml, Some(&cancellation_flag), )?; diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 4d66df1d..5266b19f 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -40,6 +40,7 @@ pub fn parse_file_at_path( timeout: u64, debug: bool, debug_graph: bool, + debug_xml: bool, cancellation_flag: Option<&AtomicUsize>, ) -> Result { let mut _log_session = None; @@ -151,6 +152,60 @@ pub fn parse_file_at_path( println!(""); } + if debug_xml { + let mut needs_newline = false; + let mut indent_level = 0; + let mut did_visit_children = false; + let mut tags: Vec<&str> = Vec::new(); + loop { + let node = cursor.node(); + let is_named = node.is_named(); + if did_visit_children { + if is_named { + let tag = tags.pop(); + write!(&mut stdout, "\n", tag.expect("there is a tag"))?; + needs_newline = true; + } + if cursor.goto_next_sibling() { + did_visit_children = false; + } else if cursor.goto_parent() { + did_visit_children = true; + indent_level -= 1; + } else { + break; + } + } else { + if is_named { + if needs_newline { + stdout.write(b"\n")?; + } + for _ in 0..indent_level { + stdout.write(b" ")?; + } + write!(&mut stdout, "<{}", node.kind())?; + if let Some(field_name) = cursor.field_name() { + write!(&mut stdout, " type=\"{}\"", field_name)?; + } + write!(&mut stdout, ">")?; + tags.push(node.kind()); + needs_newline = true; + } + if cursor.goto_first_child() { + did_visit_children = false; + indent_level += 1; + } else { + did_visit_children = true; + let start = node.start_byte(); + let end = node.end_byte(); + let value = std::str::from_utf8(&source_code[start..end]).expect("has a string"); + write!(&mut stdout, "{}", html_escape::encode_text(value))?; + } + } + } + cursor.reset(tree.root_node()); + println!(""); + } + let mut first_error = None; loop { let node = cursor.node(); diff --git a/cli/src/test.rs b/cli/src/test.rs index 7c143ecd..c8cfe89f 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -6,9 +6,10 @@ use lazy_static::lazy_static; use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder}; use regex::Regex; use std::char; +use std::fmt::Write as FmtWrite; use std::fs; use std::io::{self, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::str; use tree_sitter::{Language, LogType, Parser, Query}; @@ -30,6 +31,7 @@ pub enum TestEntry { Group { name: String, children: Vec, + file_path: Option, }, Example { name: String, @@ -44,6 +46,7 @@ impl Default for TestEntry { TestEntry::Group { name: String::new(), children: Vec::new(), + file_path: None, } } } @@ -54,6 +57,7 @@ pub fn run_tests_at_path( debug: bool, debug_graph: bool, filter: Option<&str>, + update: bool, ) -> Result<()> { let test_entry = parse_tests(path)?; let mut _log_session = None; @@ -72,27 +76,45 @@ pub fn run_tests_at_path( } let mut failures = Vec::new(); - if let TestEntry::Group { children, .. } = test_entry { - for child in children { - run_tests(&mut parser, child, filter, 0, &mut failures)?; - } - } + let mut corrected_entries = Vec::new(); + run_tests( + &mut parser, + test_entry, + filter, + 0, + &mut failures, + update, + &mut corrected_entries, + )?; if failures.len() > 0 { println!(""); - if failures.len() == 1 { - println!("1 failure:") - } else { - println!("{} failures:", failures.len()) - } + if update { + if failures.len() == 1 { + println!("1 update:\n") + } else { + println!("{} updates:\n", failures.len()) + } - print_diff_key(); - for (i, (name, actual, expected)) in failures.iter().enumerate() { - println!("\n {}. {}:", i + 1, name); - print_diff(actual, expected); + for (i, (name, ..)) in failures.iter().enumerate() { + println!(" {}. {}", i + 1, name); + } + Ok(()) + } else { + if failures.len() == 1 { + println!("1 failure:") + } else { + println!("{} failures:", failures.len()) + } + + print_diff_key(); + for (i, (name, actual, expected)) in failures.iter().enumerate() { + println!("\n {}. {}:", i + 1, name); + print_diff(actual, expected); + } + Error::err(String::new()) } - Error::err(String::new()) } else { Ok(()) } @@ -149,6 +171,8 @@ fn run_tests( filter: Option<&str>, mut indent_level: i32, failures: &mut Vec<(String, String, String)>, + update: bool, + corrected_entries: &mut Vec<(String, String, String)>, ) -> Result<()> { match test_entry { TestEntry::Example { @@ -159,6 +183,11 @@ fn run_tests( } => { if let Some(filter) = filter { if !name.contains(filter) { + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&output); + corrected_entries.push((name, input, output)); + } return Ok(()); } } @@ -172,25 +201,138 @@ fn run_tests( } if actual == output { println!("✓ {}", Colour::Green.paint(&name)); + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&output); + corrected_entries.push((name, input, output)); + } } else { - println!("✗ {}", Colour::Red.paint(&name)); + if update { + let input = String::from_utf8(input).unwrap(); + let output = format_sexp(&actual); + corrected_entries.push((name.clone(), input, output)); + println!("✓ {}", Colour::Blue.paint(&name)); + } else { + println!("✗ {}", Colour::Red.paint(&name)); + } failures.push((name, actual, output)); } } - TestEntry::Group { name, children } => { - for _ in 0..indent_level { - print!(" "); + TestEntry::Group { + name, + children, + file_path, + } => { + if indent_level > 0 { + for _ in 0..indent_level { + print!(" "); + } + println!("{}:", name); } - println!("{}:", name); + + let failure_count = failures.len(); + indent_level += 1; for child in children { - run_tests(parser, child, filter, indent_level, failures)?; + run_tests( + parser, + child, + filter, + indent_level, + failures, + update, + corrected_entries, + )?; + } + + if let Some(file_path) = file_path { + if update && failures.len() - failure_count > 0 { + write_tests(&file_path, corrected_entries)?; + } + corrected_entries.clear(); } } } Ok(()) } +fn format_sexp(sexp: &String) -> String { + let mut formatted = String::new(); + + let mut indent_level = 0; + let mut has_field = false; + let mut s_iter = sexp.split(|c| c == ' ' || c == ')'); + while let Some(s) = s_iter.next() { + if s.is_empty() { + // ")" + indent_level -= 1; + write!(formatted, ")").unwrap(); + } else if s.starts_with('(') { + if has_field { + has_field = false; + } else { + if indent_level > 0 { + writeln!(formatted, "").unwrap(); + for _ in 0..indent_level { + write!(formatted, " ").unwrap(); + } + } + indent_level += 1; + } + + // "(node_name" + write!(formatted, "{}", s).unwrap(); + + let mut c_iter = s.chars(); + c_iter.next(); + let second_char = c_iter.next().unwrap(); + if second_char == 'M' || second_char == 'U' { + // "(MISSING node_name" or "(UNEXPECTED 'x'" + let s = s_iter.next().unwrap(); + write!(formatted, " {}", s).unwrap(); + } + } else if s.ends_with(':') { + // "field:" + writeln!(formatted, "").unwrap(); + for _ in 0..indent_level { + write!(formatted, " ").unwrap(); + } + write!(formatted, "{} ", s).unwrap(); + has_field = true; + indent_level += 1; + } + } + + formatted +} + +fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> { + let mut buffer = fs::File::create(file_path)?; + write_tests_to_buffer(&mut buffer, corrected_entries) +} + +fn write_tests_to_buffer( + buffer: &mut impl Write, + corrected_entries: &Vec<(String, String, String)>, +) -> Result<()> { + for (i, (name, input, output)) in corrected_entries.iter().enumerate() { + if i > 0 { + write!(buffer, "\n")?; + } + write!( + buffer, + "{}\n{}\n{}\n{}\n{}\n\n{}\n", + "=".repeat(80), + name, + "=".repeat(80), + input, + "-".repeat(80), + output.trim() + )?; + } + Ok(()) +} + pub fn parse_tests(path: &Path) -> io::Result { let name = path .file_stem() @@ -206,10 +348,14 @@ pub fn parse_tests(path: &Path) -> io::Result { children.push(parse_tests(&entry.path())?); } } - Ok(TestEntry::Group { name, children }) + Ok(TestEntry::Group { + name, + children, + file_path: None, + }) } else { let content = fs::read_to_string(path)?; - Ok(parse_test_content(name, content)) + Ok(parse_test_content(name, content, Some(path.to_path_buf()))) } } @@ -217,7 +363,7 @@ pub fn strip_sexp_fields(sexp: String) -> String { SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() } -fn parse_test_content(name: String, content: String) -> TestEntry { +fn parse_test_content(name: String, content: String, file_path: Option) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); let mut prev_name = String::new(); @@ -268,7 +414,11 @@ fn parse_test_content(name: String, content: String) -> TestEntry { .to_string(); prev_header_end = header_end; } - TestEntry::Group { name, children } + TestEntry::Group { + name, + children, + file_path, + } } #[cfg(test)] @@ -300,6 +450,7 @@ d "# .trim() .to_string(), + None, ); assert_eq!( @@ -319,7 +470,8 @@ d output: "(d)".to_string(), has_fields: false, }, - ] + ], + file_path: None, } ); } @@ -352,6 +504,7 @@ abc "# .trim() .to_string(), + None, ); assert_eq!( @@ -371,8 +524,67 @@ abc output: "(c (d))".to_string(), has_fields: false, }, - ] + ], + file_path: None, } ); } + + #[test] + fn test_format_sexp() { + assert_eq!( + format_sexp(&"(a b: (c) (d) e: (f (g (h (MISSING i)))))".to_string()), + r#" +(a + b: (c) + (d) + e: (f + (g + (h + (MISSING i))))) +"# + .trim() + .to_string() + ); + } + + #[test] + fn test_write_tests_to_buffer() { + let mut buffer = Vec::new(); + let corrected_entries = vec![ + ( + "title 1".to_string(), + "input 1".to_string(), + "output 1".to_string(), + ), + ( + "title 2".to_string(), + "input 2".to_string(), + "output 2".to_string(), + ), + ]; + write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap(); + assert_eq!( + String::from_utf8(buffer).unwrap(), + r#" +================================================================================ +title 1 +================================================================================ +input 1 +-------------------------------------------------------------------------------- + +output 1 + +================================================================================ +title 2 +================================================================================ +input 2 +-------------------------------------------------------------------------------- + +output 2 +"# + .trim_start() + .to_string() + ); + } } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 732ff9ad..202dcd70 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -390,7 +390,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String, bool)> { } result.push((name, input, output, has_fields)); } - TestEntry::Group { mut name, children } => { + TestEntry::Group { mut name, children, .. } => { if !prefix.is_empty() { name.insert_str(0, " - "); name.insert_str(0, prefix); diff --git a/cli/src/tests/tags_test.rs b/cli/src/tests/tags_test.rs index 2b058c0b..628c0bf6 100644 --- a/cli/src/tests/tags_test.rs +++ b/cli/src/tests/tags_test.rs @@ -70,7 +70,7 @@ const RUBY_TAG_QUERY: &'static str = r#" (method name: (_) @name) @definition.method -(method_call +(call method: (identifier) @name) @reference.call (setter (identifier) @ignore) @@ -317,19 +317,17 @@ fn test_tags_with_parse_error() { assert!(failed, "syntax error should have been detected"); assert_eq!( - newtags.iter() + newtags + .iter() .map(|t| ( substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id) )) .collect::>(), - &[ - ("Fine", "class"), - ] + &[("Fine", "class"),] ); } - #[test] fn test_tags_via_c_api() { allocations::record(|| { diff --git a/docs/index.md b/docs/index.md index eca3f1a9..d5c0965b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -31,7 +31,7 @@ Parsers for these languages are fairly complete: * [C#](https://github.com/tree-sitter/tree-sitter-c-sharp) * [C++](https://github.com/tree-sitter/tree-sitter-cpp) * [CSS](https://github.com/tree-sitter/tree-sitter-css) -* [Elm](https://github.com/razzeee/tree-sitter-elm) +* [Elm](https://github.com/elm-tooling/tree-sitter-elm) * [Eno](https://github.com/eno-lang/tree-sitter-eno) * [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template) - [Fennel](https://github.com/travonted/tree-sitter-fennel) @@ -46,6 +46,7 @@ Parsers for these languages are fairly complete: * [Python](https://github.com/tree-sitter/tree-sitter-python) * [Ruby](https://github.com/tree-sitter/tree-sitter-ruby) * [Rust](https://github.com/tree-sitter/tree-sitter-rust) +* [R](https://github.com/r-lib/tree-sitter-r) * [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl) * [TOML](https://github.com/ikatyang/tree-sitter-toml) * [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript) diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 071846a4..41349738 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -24,7 +24,7 @@ include = [ regex = "1" [build-dependencies] -cc = "1.0" +cc = "^1.0.58" [lib] path = "binding_rust/lib.rs" diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 0b0097f9..1c2e79dd 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -1147,6 +1147,12 @@ impl<'a> TreeCursor<'a> { } } +impl<'a> Clone for TreeCursor<'a> { + fn clone(&self) -> Self { + TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData) + } +} + impl<'a> Drop for TreeCursor<'a> { fn drop(&mut self) { unsafe { ffi::ts_tree_cursor_delete(&mut self.0) } diff --git a/lib/binding_web/README.md b/lib/binding_web/README.md index ba1b4cb6..c02d0336 100644 --- a/lib/binding_web/README.md +++ b/lib/binding_web/README.md @@ -7,7 +7,7 @@ WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-si ### Setup -You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/tag/0.14.7) and load them using a standalone script: +You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script: ```html