Merge branch 'master' into actions-ci
This commit is contained in:
commit
6dbe6a3a90
27 changed files with 932 additions and 460 deletions
22
Cargo.lock
generated
22
Cargo.lock
generated
|
|
@ -123,9 +123,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.25"
|
||||
version = "1.0.66"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f159dfd43363c4d08055a07703eb7a3406b0dac4d0584d96965a3262db3c9d16"
|
||||
checksum = "4c0496836a84f8d0495758516b8621a622beb77c0fed418570e50764093ced48"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
|
|
@ -272,6 +272,15 @@ version = "0.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
|
||||
[[package]]
|
||||
name = "html-escape"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d348900ce941b7474395ba922ed3735a517df4546a2939ddb416ce85eeaa988e"
|
||||
dependencies = [
|
||||
"utf8-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "0.1.5"
|
||||
|
|
@ -832,7 +841,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tree-sitter-cli"
|
||||
version = "0.17.3"
|
||||
version = "0.18.0"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"atty",
|
||||
|
|
@ -841,6 +850,7 @@ dependencies = [
|
|||
"difference",
|
||||
"dirs",
|
||||
"glob",
|
||||
"html-escape",
|
||||
"lazy_static",
|
||||
"libloading",
|
||||
"log",
|
||||
|
|
@ -940,6 +950,12 @@ version = "1.0.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "796f7e48bef87609f7ade7e06495a87d5cd06c7866e6a5cbfceffc558a243737"
|
||||
|
||||
[[package]]
|
||||
name = "utf8-width"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9071ac216321a4470a69fb2b28cfc68dcd1a39acd877c8be8e014df6772d8efa"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.1"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "tree-sitter-cli"
|
||||
description = "CLI tool for developing, testing, and using Tree-sitter parsers"
|
||||
version = "0.17.3"
|
||||
version = "0.18.0"
|
||||
authors = ["Max Brunsfeld <maxbrunsfeld@gmail.com>"]
|
||||
edition = "2018"
|
||||
license = "MIT"
|
||||
|
|
@ -20,7 +20,7 @@ harness = false
|
|||
|
||||
[dependencies]
|
||||
ansi_term = "0.11"
|
||||
cc = "1.0"
|
||||
cc = "^1.0.58"
|
||||
atty = "0.2"
|
||||
clap = "2.32"
|
||||
difference = "2.0"
|
||||
|
|
@ -36,6 +36,7 @@ serde_derive = "1.0"
|
|||
smallbitvec = "2.3.0"
|
||||
tiny_http = "0.6"
|
||||
webbrowser = "0.5.1"
|
||||
html-escape = "0.2.6"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
version = ">= 0.17.0"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "tree-sitter-cli",
|
||||
"version": "0.17.3",
|
||||
"version": "0.18.0",
|
||||
"author": "Max Brunsfeld",
|
||||
"license": "MIT",
|
||||
"repository": {
|
||||
|
|
|
|||
|
|
@ -577,7 +577,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
"(precedence: {}, associativity: {:?})",
|
||||
precedence, associativity
|
||||
))
|
||||
} else if precedence > 0 {
|
||||
} else if precedence != 0 {
|
||||
Some(format!("(precedence: {})", precedence))
|
||||
} else {
|
||||
None
|
||||
|
|
@ -619,6 +619,28 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
shift_items.sort_unstable();
|
||||
reduce_items.sort_unstable();
|
||||
|
||||
let list_rule_names = |mut msg: &mut String, items: &[&ParseItem]| {
|
||||
let mut last_rule_id = None;
|
||||
for item in items {
|
||||
if last_rule_id == Some(item.variable_index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if last_rule_id.is_some() {
|
||||
write!(&mut msg, " and").unwrap();
|
||||
}
|
||||
|
||||
last_rule_id = Some(item.variable_index);
|
||||
write!(
|
||||
msg,
|
||||
" `{}`",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
};
|
||||
|
||||
if actual_conflict.len() > 1 {
|
||||
if shift_items.len() > 0 {
|
||||
resolution_count += 1;
|
||||
|
|
@ -628,17 +650,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
resolution_count
|
||||
)
|
||||
.unwrap();
|
||||
for (i, item) in shift_items.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(&mut msg, " and").unwrap();
|
||||
}
|
||||
write!(
|
||||
&mut msg,
|
||||
" `{}`",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
list_rule_names(&mut msg, &shift_items);
|
||||
write!(&mut msg, " than in the other rules.\n").unwrap();
|
||||
}
|
||||
|
||||
|
|
@ -658,21 +670,11 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
resolution_count += 1;
|
||||
write!(
|
||||
&mut msg,
|
||||
" {}: Specify a left or right associativity in ",
|
||||
" {}: Specify a left or right associativity in",
|
||||
resolution_count
|
||||
)
|
||||
.unwrap();
|
||||
for (i, item) in reduce_items.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(&mut msg, " and ").unwrap();
|
||||
}
|
||||
write!(
|
||||
&mut msg,
|
||||
"`{}`",
|
||||
self.symbol_name(&Symbol::non_terminal(item.variable_index as usize))
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
list_rule_names(&mut msg, &reduce_items);
|
||||
write!(&mut msg, "\n").unwrap();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ use self::minimize_parse_table::minimize_parse_table;
|
|||
use self::token_conflicts::TokenConflictMap;
|
||||
use crate::error::Result;
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::nfa::NfaCursor;
|
||||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{AliasMap, Symbol, SymbolType, TokenSet};
|
||||
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
|
|
@ -472,10 +472,8 @@ fn all_chars_are_alphabetical(cursor: &NfaCursor) -> bool {
|
|||
cursor.transition_chars().all(|(chars, is_sep)| {
|
||||
if is_sep {
|
||||
true
|
||||
} else if let CharacterSet::Include(chars) = chars {
|
||||
chars.iter().all(|c| c.is_alphabetic() || *c == '_')
|
||||
} else {
|
||||
false
|
||||
chars.chars().all(|c| c.is_alphabetic() || c == '_')
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,10 +6,9 @@ use std::fmt;
|
|||
use std::mem::swap;
|
||||
use std::ops::Range;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum CharacterSet {
|
||||
Include(Vec<char>),
|
||||
Exclude(Vec<char>),
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct CharacterSet {
|
||||
ranges: Vec<Range<u32>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
|
@ -52,142 +51,233 @@ impl Default for Nfa {
|
|||
}
|
||||
}
|
||||
|
||||
const END: u32 = char::MAX as u32 + 1;
|
||||
|
||||
impl CharacterSet {
|
||||
pub fn empty() -> Self {
|
||||
CharacterSet::Include(Vec::new())
|
||||
CharacterSet { ranges: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn negate(self) -> CharacterSet {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => CharacterSet::Exclude(chars),
|
||||
CharacterSet::Exclude(chars) => CharacterSet::Include(chars),
|
||||
pub fn from_range(mut first: char, mut last: char) -> Self {
|
||||
if first > last {
|
||||
swap(&mut first, &mut last);
|
||||
}
|
||||
CharacterSet {
|
||||
ranges: vec![(first as u32)..(last as u32 + 1)],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add_char(self, c: char) -> Self {
|
||||
if let CharacterSet::Include(mut chars) = self {
|
||||
if let Err(i) = chars.binary_search(&c) {
|
||||
chars.insert(i, c);
|
||||
pub fn from_char(c: char) -> Self {
|
||||
CharacterSet {
|
||||
ranges: vec![(c as u32)..(c as u32 + 1)],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn negate(mut self) -> CharacterSet {
|
||||
let mut i = 0;
|
||||
let mut previous_end = 0;
|
||||
while i < self.ranges.len() {
|
||||
let range = &mut self.ranges[i];
|
||||
let start = previous_end;
|
||||
previous_end = range.end;
|
||||
if start < range.start {
|
||||
self.ranges[i] = start..range.start;
|
||||
i += 1;
|
||||
} else {
|
||||
self.ranges.remove(i);
|
||||
}
|
||||
CharacterSet::Include(chars)
|
||||
} else {
|
||||
panic!("Called add with a negated character set");
|
||||
}
|
||||
if previous_end < END {
|
||||
self.ranges.push(previous_end..END);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_range(self, start: char, end: char) -> Self {
|
||||
if let CharacterSet::Include(mut chars) = self {
|
||||
let mut c = start as u32;
|
||||
while c <= end as u32 {
|
||||
chars.push(char::from_u32(c).unwrap());
|
||||
c += 1;
|
||||
pub fn add_char(mut self, c: char) -> Self {
|
||||
self.add_int_range(0, c as u32, c as u32 + 1);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add_range(mut self, start: char, end: char) -> Self {
|
||||
self.add_int_range(0, start as u32, end as u32 + 1);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn add(mut self, other: &CharacterSet) -> Self {
|
||||
let mut index = 0;
|
||||
for range in &other.ranges {
|
||||
index = self.add_int_range(index, range.start as u32, range.end as u32);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
fn add_int_range(&mut self, mut i: usize, start: u32, end: u32) -> usize {
|
||||
while i < self.ranges.len() {
|
||||
let range = &mut self.ranges[i];
|
||||
if range.start > end {
|
||||
self.ranges.insert(i, start..end);
|
||||
return i;
|
||||
}
|
||||
chars.sort_unstable();
|
||||
chars.dedup();
|
||||
CharacterSet::Include(chars)
|
||||
} else {
|
||||
panic!("Called add with a negated character set");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(self, other: &CharacterSet) -> Self {
|
||||
match self {
|
||||
CharacterSet::Include(mut chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
chars.extend(other_chars);
|
||||
chars.sort_unstable();
|
||||
chars.dedup();
|
||||
CharacterSet::Include(chars)
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
let excluded_chars = other_chars
|
||||
.iter()
|
||||
.cloned()
|
||||
.filter(|c| !chars.contains(&c))
|
||||
.collect();
|
||||
CharacterSet::Exclude(excluded_chars)
|
||||
}
|
||||
},
|
||||
CharacterSet::Exclude(mut chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
chars.retain(|c| !other_chars.contains(&c));
|
||||
CharacterSet::Exclude(chars)
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
chars.retain(|c| other_chars.contains(&c));
|
||||
CharacterSet::Exclude(chars)
|
||||
}
|
||||
},
|
||||
if range.end >= start {
|
||||
range.end = range.end.max(end);
|
||||
range.start = range.start.min(start);
|
||||
return i;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
self.ranges.push(start..end);
|
||||
i
|
||||
}
|
||||
|
||||
pub fn does_intersect(&self, other: &CharacterSet) -> bool {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => match other {
|
||||
CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).common,
|
||||
CharacterSet::Exclude(other_chars) => compare_chars(chars, other_chars).left_only,
|
||||
},
|
||||
CharacterSet::Exclude(chars) => match other {
|
||||
CharacterSet::Include(other_chars) => compare_chars(chars, other_chars).right_only,
|
||||
CharacterSet::Exclude(_) => true,
|
||||
},
|
||||
let mut left_ranges = self.ranges.iter();
|
||||
let mut right_ranges = other.ranges.iter();
|
||||
let mut left_range = left_ranges.next();
|
||||
let mut right_range = right_ranges.next();
|
||||
while let (Some(left), Some(right)) = (&left_range, &right_range) {
|
||||
if left.end <= right.start {
|
||||
left_range = left_ranges.next();
|
||||
} else if left.start >= right.end {
|
||||
right_range = right_ranges.next();
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn remove_intersection(&mut self, other: &mut CharacterSet) -> CharacterSet {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
CharacterSet::Include(remove_chars(chars, other_chars, true))
|
||||
let mut intersection = Vec::new();
|
||||
let mut left_i = 0;
|
||||
let mut right_i = 0;
|
||||
while left_i < self.ranges.len() && right_i < other.ranges.len() {
|
||||
let left = &mut self.ranges[left_i];
|
||||
let right = &mut other.ranges[right_i];
|
||||
|
||||
match left.start.cmp(&right.start) {
|
||||
Ordering::Less => {
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
if left.end <= right.start {
|
||||
left_i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
match left.end.cmp(&right.end) {
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Ordering::Less => {
|
||||
intersection.push(right.start..left.end);
|
||||
swap(&mut left.end, &mut right.start);
|
||||
left_i += 1;
|
||||
}
|
||||
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Ordering::Equal => {
|
||||
intersection.push(right.clone());
|
||||
left.end = right.start;
|
||||
other.ranges.remove(right_i);
|
||||
}
|
||||
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Ordering::Greater => {
|
||||
intersection.push(right.clone());
|
||||
let new_range = left.start..right.start;
|
||||
left.start = right.end;
|
||||
self.ranges.insert(left_i, new_range);
|
||||
other.ranges.remove(right_i);
|
||||
left_i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
let mut removed = remove_chars(chars, other_chars, false);
|
||||
add_chars(other_chars, chars);
|
||||
swap(&mut removed, chars);
|
||||
CharacterSet::Include(removed)
|
||||
Ordering::Equal => {
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
if left.end < right.end {
|
||||
intersection.push(left.start..left.end);
|
||||
right.start = left.end;
|
||||
self.ranges.remove(left_i);
|
||||
}
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
else if left.end == right.end {
|
||||
intersection.push(left.clone());
|
||||
self.ranges.remove(left_i);
|
||||
other.ranges.remove(right_i);
|
||||
}
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
else if left.end > right.end {
|
||||
intersection.push(right.clone());
|
||||
left.start = right.end;
|
||||
other.ranges.remove(right_i);
|
||||
}
|
||||
}
|
||||
},
|
||||
CharacterSet::Exclude(chars) => match other {
|
||||
CharacterSet::Include(other_chars) => {
|
||||
let mut removed = remove_chars(other_chars, chars, false);
|
||||
add_chars(chars, other_chars);
|
||||
swap(&mut removed, other_chars);
|
||||
CharacterSet::Include(removed)
|
||||
Ordering::Greater => {
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
if left.start >= right.end {
|
||||
right_i += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
match left.end.cmp(&right.end) {
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Ordering::Less => {
|
||||
intersection.push(left.clone());
|
||||
let new_range = right.start..left.start;
|
||||
right.start = left.end;
|
||||
other.ranges.insert(right_i, new_range);
|
||||
self.ranges.remove(left_i);
|
||||
right_i += 1;
|
||||
}
|
||||
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Ordering::Equal => {
|
||||
intersection.push(left.clone());
|
||||
right.end = left.start;
|
||||
self.ranges.remove(left_i);
|
||||
}
|
||||
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Ordering::Greater => {
|
||||
intersection.push(left.start..right.end);
|
||||
swap(&mut left.start, &mut right.end);
|
||||
right_i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
CharacterSet::Exclude(other_chars) => {
|
||||
let mut result_exclusion = chars.clone();
|
||||
result_exclusion.extend(other_chars.iter().cloned());
|
||||
result_exclusion.sort_unstable();
|
||||
result_exclusion.dedup();
|
||||
remove_chars(chars, other_chars, true);
|
||||
let mut included_characters = Vec::new();
|
||||
let mut other_included_characters = Vec::new();
|
||||
swap(&mut included_characters, other_chars);
|
||||
swap(&mut other_included_characters, chars);
|
||||
*self = CharacterSet::Include(included_characters);
|
||||
*other = CharacterSet::Include(other_included_characters);
|
||||
CharacterSet::Exclude(result_exclusion)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
CharacterSet {
|
||||
ranges: intersection,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter<'a>(&'a self) -> impl Iterator<Item = u32> + 'a {
|
||||
self.ranges.iter().flat_map(|r| r.clone())
|
||||
}
|
||||
|
||||
pub fn chars<'a>(&'a self) -> impl Iterator<Item = char> + 'a {
|
||||
self.iter().filter_map(char::from_u32)
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
if let CharacterSet::Include(c) = self {
|
||||
c.is_empty()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
self.ranges.is_empty()
|
||||
}
|
||||
|
||||
pub fn ranges<'a>(
|
||||
chars: &'a Vec<char>,
|
||||
pub fn simplify_ignoring<'a>(
|
||||
&'a self,
|
||||
ruled_out_characters: &'a HashSet<u32>,
|
||||
) -> impl Iterator<Item = Range<char>> + 'a {
|
||||
) -> Vec<Range<char>> {
|
||||
let mut prev_range: Option<Range<char>> = None;
|
||||
chars
|
||||
.iter()
|
||||
.map(|c| (*c, false))
|
||||
self.chars()
|
||||
.map(|c| (c, false))
|
||||
.chain(Some(('\0', true)))
|
||||
.filter_map(move |(c, done)| {
|
||||
if done {
|
||||
|
|
@ -212,35 +302,40 @@ impl CharacterSet {
|
|||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn contains(&self, c: char) -> bool {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => chars.contains(&c),
|
||||
CharacterSet::Exclude(chars) => !chars.contains(&c),
|
||||
}
|
||||
self.ranges.iter().any(|r| r.contains(&(c as u32)))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for CharacterSet {
|
||||
fn cmp(&self, other: &CharacterSet) -> Ordering {
|
||||
match self {
|
||||
CharacterSet::Include(chars) => {
|
||||
if let CharacterSet::Include(other_chars) = other {
|
||||
order_chars(chars, other_chars)
|
||||
} else {
|
||||
Ordering::Less
|
||||
}
|
||||
let count_cmp = self
|
||||
.ranges
|
||||
.iter()
|
||||
.map(|r| r.len())
|
||||
.sum::<usize>()
|
||||
.cmp(&other.ranges.iter().map(|r| r.len()).sum());
|
||||
if count_cmp != Ordering::Equal {
|
||||
return count_cmp;
|
||||
}
|
||||
|
||||
for (left_range, right_range) in self.ranges.iter().zip(other.ranges.iter()) {
|
||||
let cmp = left_range.len().cmp(&right_range.len());
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
}
|
||||
CharacterSet::Exclude(chars) => {
|
||||
if let CharacterSet::Exclude(other_chars) = other {
|
||||
order_chars(chars, other_chars)
|
||||
} else {
|
||||
Ordering::Greater
|
||||
|
||||
for (left, right) in left_range.clone().zip(right_range.clone()) {
|
||||
let cmp = left.cmp(&right);
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ordering::Equal;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -250,89 +345,22 @@ impl PartialOrd for CharacterSet {
|
|||
}
|
||||
}
|
||||
|
||||
fn add_chars(left: &mut Vec<char>, right: &Vec<char>) {
|
||||
for c in right {
|
||||
match left.binary_search(c) {
|
||||
Err(i) => left.insert(i, *c),
|
||||
_ => {}
|
||||
impl fmt::Debug for CharacterSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "CharacterSet [")?;
|
||||
let mut set = self.clone();
|
||||
if self.contains(char::MAX) {
|
||||
write!(f, "^ ")?;
|
||||
set = set.negate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_chars(left: &mut Vec<char>, right: &mut Vec<char>, mutate_right: bool) -> Vec<char> {
|
||||
let mut result = Vec::new();
|
||||
right.retain(|right_char| {
|
||||
if let Some(index) = left.iter().position(|left_char| *left_char == *right_char) {
|
||||
left.remove(index);
|
||||
result.push(*right_char);
|
||||
false || !mutate_right
|
||||
} else {
|
||||
true
|
||||
}
|
||||
});
|
||||
result
|
||||
}
|
||||
|
||||
struct SetComparision {
|
||||
left_only: bool,
|
||||
common: bool,
|
||||
right_only: bool,
|
||||
}
|
||||
|
||||
fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
|
||||
let mut result = SetComparision {
|
||||
left_only: false,
|
||||
common: false,
|
||||
right_only: false,
|
||||
};
|
||||
let mut left = left.iter().cloned();
|
||||
let mut right = right.iter().cloned();
|
||||
let mut i = left.next();
|
||||
let mut j = right.next();
|
||||
while let (Some(left_char), Some(right_char)) = (i, j) {
|
||||
if left_char < right_char {
|
||||
i = left.next();
|
||||
result.left_only = true;
|
||||
} else if left_char > right_char {
|
||||
j = right.next();
|
||||
result.right_only = true;
|
||||
} else {
|
||||
i = left.next();
|
||||
j = right.next();
|
||||
result.common = true;
|
||||
}
|
||||
}
|
||||
|
||||
match (i, j) {
|
||||
(Some(_), _) => result.left_only = true,
|
||||
(_, Some(_)) => result.right_only = true,
|
||||
_ => {}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn order_chars(chars: &Vec<char>, other_chars: &Vec<char>) -> Ordering {
|
||||
if chars.is_empty() {
|
||||
if other_chars.is_empty() {
|
||||
Ordering::Equal
|
||||
} else {
|
||||
Ordering::Less
|
||||
}
|
||||
} else if other_chars.is_empty() {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
let cmp = chars.len().cmp(&other_chars.len());
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
}
|
||||
for (c, other_c) in chars.iter().zip(other_chars.iter()) {
|
||||
let cmp = c.cmp(other_c);
|
||||
if cmp != Ordering::Equal {
|
||||
return cmp;
|
||||
for (i, c) in set.chars().enumerate() {
|
||||
if i > 0 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
write!(f, "{:?}", c)?;
|
||||
}
|
||||
Ordering::Equal
|
||||
write!(f, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -624,48 +652,46 @@ mod tests {
|
|||
// multiple negated character classes
|
||||
(
|
||||
vec![
|
||||
(CharacterSet::Include(vec!['a']), false, 0, 1),
|
||||
(CharacterSet::Exclude(vec!['a', 'b', 'c']), false, 0, 2),
|
||||
(CharacterSet::Include(vec!['g']), false, 0, 6),
|
||||
(CharacterSet::Exclude(vec!['d', 'e', 'f']), false, 0, 3),
|
||||
(CharacterSet::Exclude(vec!['g', 'h', 'i']), false, 0, 4),
|
||||
(CharacterSet::Include(vec!['g']), false, 0, 5),
|
||||
(CharacterSet::from_char('a'), false, 0, 1),
|
||||
(CharacterSet::from_range('a', 'c').negate(), false, 0, 2),
|
||||
(CharacterSet::from_char('g'), false, 0, 6),
|
||||
(CharacterSet::from_range('d', 'f').negate(), false, 0, 3),
|
||||
(CharacterSet::from_range('g', 'i').negate(), false, 0, 4),
|
||||
(CharacterSet::from_char('g'), false, 0, 5),
|
||||
],
|
||||
vec![
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['a']),
|
||||
characters: CharacterSet::from_char('a'),
|
||||
precedence: 0,
|
||||
states: vec![1, 3, 4],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['g']),
|
||||
characters: CharacterSet::from_char('g'),
|
||||
precedence: 0,
|
||||
states: vec![2, 3, 5, 6],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['b', 'c']),
|
||||
characters: CharacterSet::from_range('b', 'c'),
|
||||
precedence: 0,
|
||||
states: vec![3, 4],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['h', 'i']),
|
||||
characters: CharacterSet::from_range('h', 'i'),
|
||||
precedence: 0,
|
||||
states: vec![2, 3],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['d', 'e', 'f']),
|
||||
characters: CharacterSet::from_range('d', 'f'),
|
||||
precedence: 0,
|
||||
states: vec![2, 4],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Exclude(vec![
|
||||
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
|
||||
]),
|
||||
characters: CharacterSet::from_range('a', 'i').negate(),
|
||||
precedence: 0,
|
||||
states: vec![2, 3, 4],
|
||||
is_separator: false,
|
||||
|
|
@ -675,21 +701,21 @@ mod tests {
|
|||
// disjoint characters with same state
|
||||
(
|
||||
vec![
|
||||
(CharacterSet::Include(vec!['a']), false, 0, 1),
|
||||
(CharacterSet::Include(vec!['b']), false, 0, 2),
|
||||
(CharacterSet::Include(vec!['c']), false, 0, 1),
|
||||
(CharacterSet::Include(vec!['d']), false, 0, 1),
|
||||
(CharacterSet::Include(vec!['e']), false, 0, 2),
|
||||
(CharacterSet::from_char('a'), false, 0, 1),
|
||||
(CharacterSet::from_char('b'), false, 0, 2),
|
||||
(CharacterSet::from_char('c'), false, 0, 1),
|
||||
(CharacterSet::from_char('d'), false, 0, 1),
|
||||
(CharacterSet::from_char('e'), false, 0, 2),
|
||||
],
|
||||
vec![
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['a', 'c', 'd']),
|
||||
characters: CharacterSet::empty().add_char('a').add_range('c', 'd'),
|
||||
precedence: 0,
|
||||
states: vec![1],
|
||||
is_separator: false,
|
||||
},
|
||||
NfaTransition {
|
||||
characters: CharacterSet::Include(vec!['b', 'e']),
|
||||
characters: CharacterSet::empty().add_char('b').add_char('e'),
|
||||
precedence: 0,
|
||||
states: vec![2],
|
||||
is_separator: false,
|
||||
|
|
@ -698,119 +724,129 @@ mod tests {
|
|||
),
|
||||
];
|
||||
|
||||
for row in table.iter() {
|
||||
for (i, row) in table.iter().enumerate() {
|
||||
assert_eq!(
|
||||
NfaCursor::group_transitions(
|
||||
row.0
|
||||
.iter()
|
||||
.map(|(chars, is_sep, prec, state)| (chars, *is_sep, *prec, *state))
|
||||
),
|
||||
row.1
|
||||
row.1,
|
||||
"row {}",
|
||||
i
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_character_set_remove_intersection() {
|
||||
// A whitelist and an overlapping whitelist.
|
||||
// Both sets contain 'c', 'd', and 'f'
|
||||
let mut a = CharacterSet::empty().add_range('a', 'f');
|
||||
let mut b = CharacterSet::empty().add_range('c', 'h');
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::empty().add_range('c', 'f')
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
|
||||
struct Row {
|
||||
left: CharacterSet,
|
||||
right: CharacterSet,
|
||||
left_only: CharacterSet,
|
||||
right_only: CharacterSet,
|
||||
intersection: CharacterSet,
|
||||
}
|
||||
|
||||
let mut a = CharacterSet::empty().add_range('a', 'f');
|
||||
let mut b = CharacterSet::empty().add_range('c', 'h');
|
||||
assert_eq!(
|
||||
b.remove_intersection(&mut a),
|
||||
CharacterSet::empty().add_range('c', 'f')
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty().add_range('a', 'b'));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('g', 'h'));
|
||||
let rows = [
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Row {
|
||||
left: CharacterSet::from_range('a', 'f'),
|
||||
right: CharacterSet::from_range('g', 'm'),
|
||||
left_only: CharacterSet::from_range('a', 'f'),
|
||||
right_only: CharacterSet::from_range('g', 'm'),
|
||||
intersection: CharacterSet::empty(),
|
||||
},
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Row {
|
||||
left: CharacterSet::from_range('a', 'f'),
|
||||
right: CharacterSet::from_range('c', 'i'),
|
||||
left_only: CharacterSet::from_range('a', 'b'),
|
||||
right_only: CharacterSet::from_range('g', 'i'),
|
||||
intersection: CharacterSet::from_range('c', 'f'),
|
||||
},
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Row {
|
||||
left: CharacterSet::from_range('a', 'f'),
|
||||
right: CharacterSet::from_range('d', 'f'),
|
||||
left_only: CharacterSet::from_range('a', 'c'),
|
||||
right_only: CharacterSet::empty(),
|
||||
intersection: CharacterSet::from_range('d', 'f'),
|
||||
},
|
||||
// [ L ]
|
||||
// [ R ]
|
||||
Row {
|
||||
left: CharacterSet::from_range('a', 'm'),
|
||||
right: CharacterSet::from_range('d', 'f'),
|
||||
left_only: CharacterSet::empty()
|
||||
.add_range('a', 'c')
|
||||
.add_range('g', 'm'),
|
||||
right_only: CharacterSet::empty(),
|
||||
intersection: CharacterSet::from_range('d', 'f'),
|
||||
},
|
||||
// [ L1 ] [ L2 ]
|
||||
// [ R ]
|
||||
Row {
|
||||
left: CharacterSet::empty()
|
||||
.add_range('a', 'e')
|
||||
.add_range('h', 'l'),
|
||||
right: CharacterSet::from_range('c', 'i'),
|
||||
left_only: CharacterSet::empty()
|
||||
.add_range('a', 'b')
|
||||
.add_range('j', 'l'),
|
||||
right_only: CharacterSet::from_range('f', 'g'),
|
||||
intersection: CharacterSet::empty()
|
||||
.add_range('c', 'e')
|
||||
.add_range('h', 'i'),
|
||||
},
|
||||
];
|
||||
|
||||
// A whitelist and a larger whitelist.
|
||||
let mut a = CharacterSet::empty().add_char('c');
|
||||
let mut b = CharacterSet::empty().add_range('a', 'e');
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::empty().add_char('c')
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty());
|
||||
assert_eq!(
|
||||
b,
|
||||
CharacterSet::empty()
|
||||
.add_range('a', 'b')
|
||||
.add_range('d', 'e')
|
||||
);
|
||||
for (i, row) in rows.iter().enumerate() {
|
||||
let mut left = row.left.clone();
|
||||
let mut right = row.right.clone();
|
||||
assert_eq!(
|
||||
left.remove_intersection(&mut right),
|
||||
row.intersection,
|
||||
"row {}a: {:?} && {:?}",
|
||||
i,
|
||||
row.left,
|
||||
row.right
|
||||
);
|
||||
assert_eq!(
|
||||
left, row.left_only,
|
||||
"row {}a: {:?} - {:?}",
|
||||
i, row.left, row.right
|
||||
);
|
||||
assert_eq!(
|
||||
right, row.right_only,
|
||||
"row {}a: {:?} - {:?}",
|
||||
i, row.right, row.left
|
||||
);
|
||||
|
||||
let mut a = CharacterSet::empty().add_char('c');
|
||||
let mut b = CharacterSet::empty().add_range('a', 'e');
|
||||
assert_eq!(
|
||||
b.remove_intersection(&mut a),
|
||||
CharacterSet::empty().add_char('c')
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty());
|
||||
assert_eq!(
|
||||
b,
|
||||
CharacterSet::empty()
|
||||
.add_range('a', 'b')
|
||||
.add_range('d', 'e')
|
||||
);
|
||||
|
||||
// An inclusion and an intersecting exclusion.
|
||||
// Both sets contain 'e', 'f', and 'm'
|
||||
let mut a = CharacterSet::empty()
|
||||
.add_range('c', 'h')
|
||||
.add_range('k', 'm');
|
||||
let mut b = CharacterSet::empty()
|
||||
.add_range('a', 'd')
|
||||
.add_range('g', 'l')
|
||||
.negate();
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::Include(vec!['e', 'f', 'm'])
|
||||
);
|
||||
assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
|
||||
|
||||
let mut a = CharacterSet::empty()
|
||||
.add_range('c', 'h')
|
||||
.add_range('k', 'm');
|
||||
let mut b = CharacterSet::empty()
|
||||
.add_range('a', 'd')
|
||||
.add_range('g', 'l')
|
||||
.negate();
|
||||
assert_eq!(
|
||||
b.remove_intersection(&mut a),
|
||||
CharacterSet::Include(vec!['e', 'f', 'm'])
|
||||
);
|
||||
assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
|
||||
assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
|
||||
|
||||
// An exclusion and an overlapping inclusion.
|
||||
// Both sets exclude 'c', 'd', and 'e'
|
||||
let mut a = CharacterSet::empty().add_range('a', 'e').negate();
|
||||
let mut b = CharacterSet::empty().add_range('c', 'h').negate();
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::empty().add_range('a', 'h').negate(),
|
||||
);
|
||||
assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
|
||||
assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
|
||||
|
||||
// An exclusion and a larger exclusion.
|
||||
let mut a = CharacterSet::empty().add_range('b', 'c').negate();
|
||||
let mut b = CharacterSet::empty().add_range('a', 'd').negate();
|
||||
assert_eq!(
|
||||
a.remove_intersection(&mut b),
|
||||
CharacterSet::empty().add_range('a', 'd').negate(),
|
||||
);
|
||||
assert_eq!(a, CharacterSet::empty().add_char('a').add_char('d'));
|
||||
assert_eq!(b, CharacterSet::empty());
|
||||
let mut left = row.left.clone();
|
||||
let mut right = row.right.clone();
|
||||
assert_eq!(
|
||||
right.remove_intersection(&mut left),
|
||||
row.intersection,
|
||||
"row {}b: {:?} && {:?}",
|
||||
i,
|
||||
row.left,
|
||||
row.right
|
||||
);
|
||||
assert_eq!(
|
||||
left, row.left_only,
|
||||
"row {}b: {:?} - {:?}",
|
||||
i, row.left, row.right
|
||||
);
|
||||
assert_eq!(
|
||||
right, row.right_only,
|
||||
"row {}b: {:?} - {:?}",
|
||||
i, row.right, row.left
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -834,29 +870,29 @@ mod tests {
|
|||
assert!(!b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Include(vec!['b']),
|
||||
CharacterSet::Exclude(vec!['a', 'b', 'c']),
|
||||
CharacterSet::from_char('b'),
|
||||
CharacterSet::from_range('a', 'c'),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::from_char('b'),
|
||||
CharacterSet::from_range('a', 'c').negate(),
|
||||
);
|
||||
assert!(!a.does_intersect(&b));
|
||||
assert!(!b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Include(vec!['b']),
|
||||
CharacterSet::Exclude(vec!['a', 'c']),
|
||||
CharacterSet::from_char('a').negate(),
|
||||
CharacterSet::from_char('a').negate(),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Exclude(vec!['a']),
|
||||
CharacterSet::Exclude(vec!['a']),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
|
||||
let (a, b) = (
|
||||
CharacterSet::Include(vec!['c']),
|
||||
CharacterSet::Exclude(vec!['a']),
|
||||
CharacterSet::from_char('c'),
|
||||
CharacterSet::from_char('a').negate(),
|
||||
);
|
||||
assert!(a.does_intersect(&b));
|
||||
assert!(b.does_intersect(&a));
|
||||
|
|
@ -898,7 +934,11 @@ mod tests {
|
|||
.into_iter()
|
||||
.map(|c: &char| *c as u32)
|
||||
.collect();
|
||||
let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
|
||||
let mut set = CharacterSet::empty();
|
||||
for c in chars {
|
||||
set = set.add_char(*c);
|
||||
}
|
||||
let ranges = set.simplify_ignoring(&ruled_out_chars);
|
||||
assert_eq!(ranges, *expected_ranges);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ use std::i32;
|
|||
|
||||
lazy_static! {
|
||||
static ref CURLY_BRACE_REGEX: Regex =
|
||||
Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-F,}][^}]*)\}"#).unwrap();
|
||||
Regex::new(r#"(^|[^\\])\{([^}]*[^0-9A-Fa-f,}][^}]*)\}"#).unwrap();
|
||||
}
|
||||
|
||||
const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
|
||||
|
|
@ -198,11 +198,11 @@ impl NfaBuilder {
|
|||
Ast::Empty(_) => Ok(false),
|
||||
Ast::Flags(_) => Err(Error::regex("Flags are not supported")),
|
||||
Ast::Literal(literal) => {
|
||||
self.push_advance(CharacterSet::Include(vec![literal.c]), next_state_id);
|
||||
self.push_advance(CharacterSet::from_char(literal.c), next_state_id);
|
||||
Ok(true)
|
||||
}
|
||||
Ast::Dot(_) => {
|
||||
self.push_advance(CharacterSet::Exclude(vec!['\n']), next_state_id);
|
||||
self.push_advance(CharacterSet::from_char('\n').negate(), next_state_id);
|
||||
Ok(true)
|
||||
}
|
||||
Ast::Assertion(_) => Err(Error::regex("Assertions are not supported")),
|
||||
|
|
@ -344,11 +344,9 @@ impl NfaBuilder {
|
|||
|
||||
fn expand_character_class(&self, item: &ClassSetItem) -> Result<CharacterSet> {
|
||||
match item {
|
||||
ClassSetItem::Empty(_) => Ok(CharacterSet::Include(Vec::new())),
|
||||
ClassSetItem::Literal(literal) => Ok(CharacterSet::Include(vec![literal.c])),
|
||||
ClassSetItem::Range(range) => {
|
||||
Ok(CharacterSet::empty().add_range(range.start.c, range.end.c))
|
||||
}
|
||||
ClassSetItem::Empty(_) => Ok(CharacterSet::empty()),
|
||||
ClassSetItem::Literal(literal) => Ok(CharacterSet::from_char(literal.c)),
|
||||
ClassSetItem::Range(range) => Ok(CharacterSet::from_range(range.start.c, range.end.c)),
|
||||
ClassSetItem::Union(union) => {
|
||||
let mut result = CharacterSet::empty();
|
||||
for item in &union.items {
|
||||
|
|
@ -366,7 +364,7 @@ impl NfaBuilder {
|
|||
|
||||
fn expand_perl_character_class(&self, item: &ClassPerlKind) -> CharacterSet {
|
||||
match item {
|
||||
ClassPerlKind::Digit => CharacterSet::empty().add_range('0', '9'),
|
||||
ClassPerlKind::Digit => CharacterSet::from_range('0', '9'),
|
||||
ClassPerlKind::Space => CharacterSet::empty()
|
||||
.add_char(' ')
|
||||
.add_char('\t')
|
||||
|
|
@ -653,12 +651,15 @@ mod tests {
|
|||
Rule::pattern(r#"\{[ab]{3}\}"#),
|
||||
// Unicode codepoints
|
||||
Rule::pattern(r#"\u{1000A}"#),
|
||||
// Unicode codepoints (lowercase)
|
||||
Rule::pattern(r#"\u{1000b}"#),
|
||||
],
|
||||
separators: vec![],
|
||||
examples: vec![
|
||||
("u{1234} ok", Some((0, "u{1234}"))),
|
||||
("{aba}}", Some((1, "{aba}"))),
|
||||
("\u{1000A}", Some((2, "\u{1000A}"))),
|
||||
("\u{1000b}", Some((3, "\u{1000b}"))),
|
||||
],
|
||||
},
|
||||
];
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
use super::tables::{
|
||||
AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
|
||||
|
|
@ -659,21 +658,19 @@ impl Generator {
|
|||
.advance_actions
|
||||
.iter()
|
||||
.map(|(chars, action)| {
|
||||
let (chars, is_included) = match chars {
|
||||
CharacterSet::Include(c) => (c, true),
|
||||
CharacterSet::Exclude(c) => (c, false),
|
||||
};
|
||||
let mut call_id = None;
|
||||
let mut ranges =
|
||||
CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
|
||||
let is_included = !chars.contains(std::char::MAX);
|
||||
let mut ranges;
|
||||
if is_included {
|
||||
ruled_out_chars.extend(chars.iter().map(|c| *c as u32));
|
||||
ranges = chars.simplify_ignoring(&ruled_out_chars);
|
||||
ruled_out_chars.extend(chars.iter());
|
||||
} else {
|
||||
ranges = chars.clone().negate().simplify_ignoring(&ruled_out_chars);
|
||||
ranges.insert(0, '\0'..'\0')
|
||||
}
|
||||
|
||||
// Record any large character sets so that they can be extracted
|
||||
// into helper functions, reducing code duplication.
|
||||
let mut call_id = None;
|
||||
if extract_helper_functions && ranges.len() > LARGE_CHARACTER_RANGE_COUNT {
|
||||
let char_set_symbol = self
|
||||
.symbol_for_advance_action(action, &lex_table)
|
||||
|
|
@ -887,11 +884,16 @@ impl Generator {
|
|||
add!(self, " &&{}lookahead != ", line_break);
|
||||
self.add_character(range.end);
|
||||
} else {
|
||||
add!(self, "(lookahead < ");
|
||||
self.add_character(range.start);
|
||||
add!(self, " || ");
|
||||
self.add_character(range.end);
|
||||
add!(self, " < lookahead)");
|
||||
if range.start != '\0' {
|
||||
add!(self, "(lookahead < ");
|
||||
self.add_character(range.start);
|
||||
add!(self, " || ");
|
||||
self.add_character(range.end);
|
||||
add!(self, " < lookahead)");
|
||||
} else {
|
||||
add!(self, "lookahead > ");
|
||||
self.add_character(range.end);
|
||||
}
|
||||
}
|
||||
}
|
||||
did_add = true;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,14 @@
|
|||
try {
|
||||
module.exports = require("./build/Release/tree_sitter_PARSER_NAME_binding");
|
||||
} catch (error) {
|
||||
try {
|
||||
module.exports = require("./build/Debug/tree_sitter_PARSER_NAME_binding");
|
||||
} catch (_) {
|
||||
if (error.code !== 'MODULE_NOT_FOUND')
|
||||
throw error
|
||||
else try {
|
||||
module.exports = require("./build/Debug/tree_sitter_PARSER_NAME_binding");
|
||||
} catch (error2) {
|
||||
if (error2.code === 'MODULE_NOT_FOUND')
|
||||
throw error
|
||||
throw error2
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -63,6 +63,7 @@ fn run() -> error::Result<()> {
|
|||
.arg(Arg::with_name("scope").long("scope").takes_value(true))
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
|
||||
.arg(Arg::with_name("debug-xml").long("xml").short("x"))
|
||||
.arg(Arg::with_name("quiet").long("quiet").short("q"))
|
||||
.arg(Arg::with_name("stat").long("stat").short("s"))
|
||||
.arg(Arg::with_name("time").long("time").short("t"))
|
||||
|
|
@ -119,6 +120,7 @@ fn run() -> error::Result<()> {
|
|||
.short("f")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(Arg::with_name("update").long("update").short("u"))
|
||||
.arg(Arg::with_name("debug").long("debug").short("d"))
|
||||
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D")),
|
||||
)
|
||||
|
|
@ -193,6 +195,7 @@ fn run() -> error::Result<()> {
|
|||
} else if let Some(matches) = matches.subcommand_matches("test") {
|
||||
let debug = matches.is_present("debug");
|
||||
let debug_graph = matches.is_present("debug-graph");
|
||||
let update = matches.is_present("update");
|
||||
let filter = matches.value_of("filter");
|
||||
let languages = loader.languages_at_path(¤t_dir)?;
|
||||
let language = languages
|
||||
|
|
@ -206,7 +209,7 @@ fn run() -> error::Result<()> {
|
|||
test_corpus_dir = current_dir.join("corpus");
|
||||
}
|
||||
if test_corpus_dir.is_dir() {
|
||||
test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter)?;
|
||||
test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter, update)?;
|
||||
}
|
||||
|
||||
// Check that all of the queries are valid.
|
||||
|
|
@ -220,6 +223,7 @@ fn run() -> error::Result<()> {
|
|||
} else if let Some(matches) = matches.subcommand_matches("parse") {
|
||||
let debug = matches.is_present("debug");
|
||||
let debug_graph = matches.is_present("debug-graph");
|
||||
let debug_xml = matches.is_present("debug-xml");
|
||||
let quiet = matches.is_present("quiet");
|
||||
let time = matches.is_present("time");
|
||||
let edits = matches
|
||||
|
|
@ -255,6 +259,7 @@ fn run() -> error::Result<()> {
|
|||
timeout,
|
||||
debug,
|
||||
debug_graph,
|
||||
debug_xml,
|
||||
Some(&cancellation_flag),
|
||||
)?;
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ pub fn parse_file_at_path(
|
|||
timeout: u64,
|
||||
debug: bool,
|
||||
debug_graph: bool,
|
||||
debug_xml: bool,
|
||||
cancellation_flag: Option<&AtomicUsize>,
|
||||
) -> Result<bool> {
|
||||
let mut _log_session = None;
|
||||
|
|
@ -151,6 +152,60 @@ pub fn parse_file_at_path(
|
|||
println!("");
|
||||
}
|
||||
|
||||
if debug_xml {
|
||||
let mut needs_newline = false;
|
||||
let mut indent_level = 0;
|
||||
let mut did_visit_children = false;
|
||||
let mut tags: Vec<&str> = Vec::new();
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
let is_named = node.is_named();
|
||||
if did_visit_children {
|
||||
if is_named {
|
||||
let tag = tags.pop();
|
||||
write!(&mut stdout, "</{}>\n", tag.expect("there is a tag"))?;
|
||||
needs_newline = true;
|
||||
}
|
||||
if cursor.goto_next_sibling() {
|
||||
did_visit_children = false;
|
||||
} else if cursor.goto_parent() {
|
||||
did_visit_children = true;
|
||||
indent_level -= 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if is_named {
|
||||
if needs_newline {
|
||||
stdout.write(b"\n")?;
|
||||
}
|
||||
for _ in 0..indent_level {
|
||||
stdout.write(b" ")?;
|
||||
}
|
||||
write!(&mut stdout, "<{}", node.kind())?;
|
||||
if let Some(field_name) = cursor.field_name() {
|
||||
write!(&mut stdout, " type=\"{}\"", field_name)?;
|
||||
}
|
||||
write!(&mut stdout, ">")?;
|
||||
tags.push(node.kind());
|
||||
needs_newline = true;
|
||||
}
|
||||
if cursor.goto_first_child() {
|
||||
did_visit_children = false;
|
||||
indent_level += 1;
|
||||
} else {
|
||||
did_visit_children = true;
|
||||
let start = node.start_byte();
|
||||
let end = node.end_byte();
|
||||
let value = std::str::from_utf8(&source_code[start..end]).expect("has a string");
|
||||
write!(&mut stdout, "{}", html_escape::encode_text(value))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
cursor.reset(tree.root_node());
|
||||
println!("");
|
||||
}
|
||||
|
||||
let mut first_error = None;
|
||||
loop {
|
||||
let node = cursor.node();
|
||||
|
|
|
|||
268
cli/src/test.rs
268
cli/src/test.rs
|
|
@ -6,9 +6,10 @@ use lazy_static::lazy_static;
|
|||
use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
|
||||
use regex::Regex;
|
||||
use std::char;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str;
|
||||
use tree_sitter::{Language, LogType, Parser, Query};
|
||||
|
||||
|
|
@ -30,6 +31,7 @@ pub enum TestEntry {
|
|||
Group {
|
||||
name: String,
|
||||
children: Vec<TestEntry>,
|
||||
file_path: Option<PathBuf>,
|
||||
},
|
||||
Example {
|
||||
name: String,
|
||||
|
|
@ -44,6 +46,7 @@ impl Default for TestEntry {
|
|||
TestEntry::Group {
|
||||
name: String::new(),
|
||||
children: Vec::new(),
|
||||
file_path: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -54,6 +57,7 @@ pub fn run_tests_at_path(
|
|||
debug: bool,
|
||||
debug_graph: bool,
|
||||
filter: Option<&str>,
|
||||
update: bool,
|
||||
) -> Result<()> {
|
||||
let test_entry = parse_tests(path)?;
|
||||
let mut _log_session = None;
|
||||
|
|
@ -72,27 +76,45 @@ pub fn run_tests_at_path(
|
|||
}
|
||||
|
||||
let mut failures = Vec::new();
|
||||
if let TestEntry::Group { children, .. } = test_entry {
|
||||
for child in children {
|
||||
run_tests(&mut parser, child, filter, 0, &mut failures)?;
|
||||
}
|
||||
}
|
||||
let mut corrected_entries = Vec::new();
|
||||
run_tests(
|
||||
&mut parser,
|
||||
test_entry,
|
||||
filter,
|
||||
0,
|
||||
&mut failures,
|
||||
update,
|
||||
&mut corrected_entries,
|
||||
)?;
|
||||
|
||||
if failures.len() > 0 {
|
||||
println!("");
|
||||
|
||||
if failures.len() == 1 {
|
||||
println!("1 failure:")
|
||||
} else {
|
||||
println!("{} failures:", failures.len())
|
||||
}
|
||||
if update {
|
||||
if failures.len() == 1 {
|
||||
println!("1 update:\n")
|
||||
} else {
|
||||
println!("{} updates:\n", failures.len())
|
||||
}
|
||||
|
||||
print_diff_key();
|
||||
for (i, (name, actual, expected)) in failures.iter().enumerate() {
|
||||
println!("\n {}. {}:", i + 1, name);
|
||||
print_diff(actual, expected);
|
||||
for (i, (name, ..)) in failures.iter().enumerate() {
|
||||
println!(" {}. {}", i + 1, name);
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
if failures.len() == 1 {
|
||||
println!("1 failure:")
|
||||
} else {
|
||||
println!("{} failures:", failures.len())
|
||||
}
|
||||
|
||||
print_diff_key();
|
||||
for (i, (name, actual, expected)) in failures.iter().enumerate() {
|
||||
println!("\n {}. {}:", i + 1, name);
|
||||
print_diff(actual, expected);
|
||||
}
|
||||
Error::err(String::new())
|
||||
}
|
||||
Error::err(String::new())
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -149,6 +171,8 @@ fn run_tests(
|
|||
filter: Option<&str>,
|
||||
mut indent_level: i32,
|
||||
failures: &mut Vec<(String, String, String)>,
|
||||
update: bool,
|
||||
corrected_entries: &mut Vec<(String, String, String)>,
|
||||
) -> Result<()> {
|
||||
match test_entry {
|
||||
TestEntry::Example {
|
||||
|
|
@ -159,6 +183,11 @@ fn run_tests(
|
|||
} => {
|
||||
if let Some(filter) = filter {
|
||||
if !name.contains(filter) {
|
||||
if update {
|
||||
let input = String::from_utf8(input).unwrap();
|
||||
let output = format_sexp(&output);
|
||||
corrected_entries.push((name, input, output));
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
|
@ -172,25 +201,138 @@ fn run_tests(
|
|||
}
|
||||
if actual == output {
|
||||
println!("✓ {}", Colour::Green.paint(&name));
|
||||
if update {
|
||||
let input = String::from_utf8(input).unwrap();
|
||||
let output = format_sexp(&output);
|
||||
corrected_entries.push((name, input, output));
|
||||
}
|
||||
} else {
|
||||
println!("✗ {}", Colour::Red.paint(&name));
|
||||
if update {
|
||||
let input = String::from_utf8(input).unwrap();
|
||||
let output = format_sexp(&actual);
|
||||
corrected_entries.push((name.clone(), input, output));
|
||||
println!("✓ {}", Colour::Blue.paint(&name));
|
||||
} else {
|
||||
println!("✗ {}", Colour::Red.paint(&name));
|
||||
}
|
||||
failures.push((name, actual, output));
|
||||
}
|
||||
}
|
||||
TestEntry::Group { name, children } => {
|
||||
for _ in 0..indent_level {
|
||||
print!(" ");
|
||||
TestEntry::Group {
|
||||
name,
|
||||
children,
|
||||
file_path,
|
||||
} => {
|
||||
if indent_level > 0 {
|
||||
for _ in 0..indent_level {
|
||||
print!(" ");
|
||||
}
|
||||
println!("{}:", name);
|
||||
}
|
||||
println!("{}:", name);
|
||||
|
||||
let failure_count = failures.len();
|
||||
|
||||
indent_level += 1;
|
||||
for child in children {
|
||||
run_tests(parser, child, filter, indent_level, failures)?;
|
||||
run_tests(
|
||||
parser,
|
||||
child,
|
||||
filter,
|
||||
indent_level,
|
||||
failures,
|
||||
update,
|
||||
corrected_entries,
|
||||
)?;
|
||||
}
|
||||
|
||||
if let Some(file_path) = file_path {
|
||||
if update && failures.len() - failure_count > 0 {
|
||||
write_tests(&file_path, corrected_entries)?;
|
||||
}
|
||||
corrected_entries.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn format_sexp(sexp: &String) -> String {
|
||||
let mut formatted = String::new();
|
||||
|
||||
let mut indent_level = 0;
|
||||
let mut has_field = false;
|
||||
let mut s_iter = sexp.split(|c| c == ' ' || c == ')');
|
||||
while let Some(s) = s_iter.next() {
|
||||
if s.is_empty() {
|
||||
// ")"
|
||||
indent_level -= 1;
|
||||
write!(formatted, ")").unwrap();
|
||||
} else if s.starts_with('(') {
|
||||
if has_field {
|
||||
has_field = false;
|
||||
} else {
|
||||
if indent_level > 0 {
|
||||
writeln!(formatted, "").unwrap();
|
||||
for _ in 0..indent_level {
|
||||
write!(formatted, " ").unwrap();
|
||||
}
|
||||
}
|
||||
indent_level += 1;
|
||||
}
|
||||
|
||||
// "(node_name"
|
||||
write!(formatted, "{}", s).unwrap();
|
||||
|
||||
let mut c_iter = s.chars();
|
||||
c_iter.next();
|
||||
let second_char = c_iter.next().unwrap();
|
||||
if second_char == 'M' || second_char == 'U' {
|
||||
// "(MISSING node_name" or "(UNEXPECTED 'x'"
|
||||
let s = s_iter.next().unwrap();
|
||||
write!(formatted, " {}", s).unwrap();
|
||||
}
|
||||
} else if s.ends_with(':') {
|
||||
// "field:"
|
||||
writeln!(formatted, "").unwrap();
|
||||
for _ in 0..indent_level {
|
||||
write!(formatted, " ").unwrap();
|
||||
}
|
||||
write!(formatted, "{} ", s).unwrap();
|
||||
has_field = true;
|
||||
indent_level += 1;
|
||||
}
|
||||
}
|
||||
|
||||
formatted
|
||||
}
|
||||
|
||||
fn write_tests(file_path: &Path, corrected_entries: &Vec<(String, String, String)>) -> Result<()> {
|
||||
let mut buffer = fs::File::create(file_path)?;
|
||||
write_tests_to_buffer(&mut buffer, corrected_entries)
|
||||
}
|
||||
|
||||
fn write_tests_to_buffer(
|
||||
buffer: &mut impl Write,
|
||||
corrected_entries: &Vec<(String, String, String)>,
|
||||
) -> Result<()> {
|
||||
for (i, (name, input, output)) in corrected_entries.iter().enumerate() {
|
||||
if i > 0 {
|
||||
write!(buffer, "\n")?;
|
||||
}
|
||||
write!(
|
||||
buffer,
|
||||
"{}\n{}\n{}\n{}\n{}\n\n{}\n",
|
||||
"=".repeat(80),
|
||||
name,
|
||||
"=".repeat(80),
|
||||
input,
|
||||
"-".repeat(80),
|
||||
output.trim()
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
|
||||
let name = path
|
||||
.file_stem()
|
||||
|
|
@ -206,10 +348,14 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
|
|||
children.push(parse_tests(&entry.path())?);
|
||||
}
|
||||
}
|
||||
Ok(TestEntry::Group { name, children })
|
||||
Ok(TestEntry::Group {
|
||||
name,
|
||||
children,
|
||||
file_path: None,
|
||||
})
|
||||
} else {
|
||||
let content = fs::read_to_string(path)?;
|
||||
Ok(parse_test_content(name, content))
|
||||
Ok(parse_test_content(name, content, Some(path.to_path_buf())))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -217,7 +363,7 @@ pub fn strip_sexp_fields(sexp: String) -> String {
|
|||
SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string()
|
||||
}
|
||||
|
||||
fn parse_test_content(name: String, content: String) -> TestEntry {
|
||||
fn parse_test_content(name: String, content: String, file_path: Option<PathBuf>) -> TestEntry {
|
||||
let mut children = Vec::new();
|
||||
let bytes = content.as_bytes();
|
||||
let mut prev_name = String::new();
|
||||
|
|
@ -268,7 +414,11 @@ fn parse_test_content(name: String, content: String) -> TestEntry {
|
|||
.to_string();
|
||||
prev_header_end = header_end;
|
||||
}
|
||||
TestEntry::Group { name, children }
|
||||
TestEntry::Group {
|
||||
name,
|
||||
children,
|
||||
file_path,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -300,6 +450,7 @@ d
|
|||
"#
|
||||
.trim()
|
||||
.to_string(),
|
||||
None,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
|
|
@ -319,7 +470,8 @@ d
|
|||
output: "(d)".to_string(),
|
||||
has_fields: false,
|
||||
},
|
||||
]
|
||||
],
|
||||
file_path: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
@ -352,6 +504,7 @@ abc
|
|||
"#
|
||||
.trim()
|
||||
.to_string(),
|
||||
None,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
|
|
@ -371,8 +524,67 @@ abc
|
|||
output: "(c (d))".to_string(),
|
||||
has_fields: false,
|
||||
},
|
||||
]
|
||||
],
|
||||
file_path: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format_sexp() {
|
||||
assert_eq!(
|
||||
format_sexp(&"(a b: (c) (d) e: (f (g (h (MISSING i)))))".to_string()),
|
||||
r#"
|
||||
(a
|
||||
b: (c)
|
||||
(d)
|
||||
e: (f
|
||||
(g
|
||||
(h
|
||||
(MISSING i)))))
|
||||
"#
|
||||
.trim()
|
||||
.to_string()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_write_tests_to_buffer() {
|
||||
let mut buffer = Vec::new();
|
||||
let corrected_entries = vec![
|
||||
(
|
||||
"title 1".to_string(),
|
||||
"input 1".to_string(),
|
||||
"output 1".to_string(),
|
||||
),
|
||||
(
|
||||
"title 2".to_string(),
|
||||
"input 2".to_string(),
|
||||
"output 2".to_string(),
|
||||
),
|
||||
];
|
||||
write_tests_to_buffer(&mut buffer, &corrected_entries).unwrap();
|
||||
assert_eq!(
|
||||
String::from_utf8(buffer).unwrap(),
|
||||
r#"
|
||||
================================================================================
|
||||
title 1
|
||||
================================================================================
|
||||
input 1
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
output 1
|
||||
|
||||
================================================================================
|
||||
title 2
|
||||
================================================================================
|
||||
input 2
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
output 2
|
||||
"#
|
||||
.trim_start()
|
||||
.to_string()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -390,7 +390,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
|
|||
}
|
||||
result.push((name, input, output, has_fields));
|
||||
}
|
||||
TestEntry::Group { mut name, children } => {
|
||||
TestEntry::Group { mut name, children, .. } => {
|
||||
if !prefix.is_empty() {
|
||||
name.insert_str(0, " - ");
|
||||
name.insert_str(0, prefix);
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ const RUBY_TAG_QUERY: &'static str = r#"
|
|||
(method
|
||||
name: (_) @name) @definition.method
|
||||
|
||||
(method_call
|
||||
(call
|
||||
method: (identifier) @name) @reference.call
|
||||
|
||||
(setter (identifier) @ignore)
|
||||
|
|
@ -317,19 +317,17 @@ fn test_tags_with_parse_error() {
|
|||
assert!(failed, "syntax error should have been detected");
|
||||
|
||||
assert_eq!(
|
||||
newtags.iter()
|
||||
newtags
|
||||
.iter()
|
||||
.map(|t| (
|
||||
substr(source, &t.name_range),
|
||||
tags_config.syntax_type_name(t.syntax_type_id)
|
||||
))
|
||||
.collect::<Vec<_>>(),
|
||||
&[
|
||||
("Fine", "class"),
|
||||
]
|
||||
&[("Fine", "class"),]
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn test_tags_via_c_api() {
|
||||
allocations::record(|| {
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ Parsers for these languages are fairly complete:
|
|||
* [C#](https://github.com/tree-sitter/tree-sitter-c-sharp)
|
||||
* [C++](https://github.com/tree-sitter/tree-sitter-cpp)
|
||||
* [CSS](https://github.com/tree-sitter/tree-sitter-css)
|
||||
* [Elm](https://github.com/razzeee/tree-sitter-elm)
|
||||
* [Elm](https://github.com/elm-tooling/tree-sitter-elm)
|
||||
* [Eno](https://github.com/eno-lang/tree-sitter-eno)
|
||||
* [ERB / EJS](https://github.com/tree-sitter/tree-sitter-embedded-template)
|
||||
- [Fennel](https://github.com/travonted/tree-sitter-fennel)
|
||||
|
|
@ -46,6 +46,7 @@ Parsers for these languages are fairly complete:
|
|||
* [Python](https://github.com/tree-sitter/tree-sitter-python)
|
||||
* [Ruby](https://github.com/tree-sitter/tree-sitter-ruby)
|
||||
* [Rust](https://github.com/tree-sitter/tree-sitter-rust)
|
||||
* [R](https://github.com/r-lib/tree-sitter-r)
|
||||
* [SystemRDL](https://github.com/SystemRDL/tree-sitter-systemrdl)
|
||||
* [TOML](https://github.com/ikatyang/tree-sitter-toml)
|
||||
* [TypeScript](https://github.com/tree-sitter/tree-sitter-typescript)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ include = [
|
|||
regex = "1"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.0"
|
||||
cc = "^1.0.58"
|
||||
|
||||
[lib]
|
||||
path = "binding_rust/lib.rs"
|
||||
|
|
|
|||
|
|
@ -1147,6 +1147,12 @@ impl<'a> TreeCursor<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> Clone for TreeCursor<'a> {
|
||||
fn clone(&self) -> Self {
|
||||
TreeCursor(unsafe { ffi::ts_tree_cursor_copy(&self.0) }, PhantomData)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Drop for TreeCursor<'a> {
|
||||
fn drop(&mut self) {
|
||||
unsafe { ffi::ts_tree_cursor_delete(&mut self.0) }
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ WebAssembly bindings to the [Tree-sitter](https://github.com/tree-sitter/tree-si
|
|||
|
||||
### Setup
|
||||
|
||||
You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/tag/0.14.7) and load them using a standalone script:
|
||||
You can download the the `tree-sitter.js` and `tree-sitter.wasm` files from [the latest GitHub release](https://github.com/tree-sitter/tree-sitter/releases/latest) and load them using a standalone script:
|
||||
|
||||
```html
|
||||
<script src="/the/path/to/tree-sitter.js"/>
|
||||
|
|
|
|||
|
|
@ -184,6 +184,20 @@ TSTree *ts_parser_parse_wasm(
|
|||
return ts_parser_parse(self, old_tree, input);
|
||||
}
|
||||
|
||||
/**********************/
|
||||
/* Section - Language */
|
||||
/**********************/
|
||||
|
||||
int ts_language_type_is_named_wasm(const TSLanguage *self, TSSymbol typeId) {
|
||||
const TSSymbolType symbolType = ts_language_symbol_type(self, typeId);
|
||||
return symbolType == TSSymbolTypeRegular;
|
||||
}
|
||||
|
||||
int ts_language_type_is_visible_wasm(const TSLanguage *self, TSSymbol typeId) {
|
||||
const TSSymbolType symbolType = ts_language_symbol_type(self, typeId);
|
||||
return symbolType <= TSSymbolTypeAnonymous;
|
||||
}
|
||||
|
||||
/******************/
|
||||
/* Section - Tree */
|
||||
/******************/
|
||||
|
|
|
|||
|
|
@ -646,6 +646,32 @@ class Language {
|
|||
return this.fields[fieldId] || null;
|
||||
}
|
||||
|
||||
idForNodeType(type, named) {
|
||||
const typeLength = lengthBytesUTF8(type);
|
||||
const typeAddress = C._malloc(typeLength + 1);
|
||||
stringToUTF8(type, typeAddress, typeLength + 1);
|
||||
const result = C._ts_language_symbol_for_name(this[0], typeAddress, typeLength, named);
|
||||
C._free(typeAddress);
|
||||
return result || null;
|
||||
}
|
||||
|
||||
get nodeTypeCount() {
|
||||
return C._ts_language_symbol_count(this[0]);
|
||||
}
|
||||
|
||||
nodeTypeForId(typeId) {
|
||||
const name = C._ts_language_symbol_name(this[0], typeId);
|
||||
return name ? UTF8ToString(name) : null;
|
||||
}
|
||||
|
||||
nodeTypeIsNamed(typeId) {
|
||||
return C._ts_language_type_is_named_wasm(this[0], typeId) ? true : false;
|
||||
}
|
||||
|
||||
nodeTypeIsVisible(typeId) {
|
||||
return C._ts_language_type_is_visible_wasm(this[0], typeId) ? true : false;
|
||||
}
|
||||
|
||||
query(source) {
|
||||
const sourceLength = lengthBytesUTF8(source);
|
||||
const sourceAddress = C._malloc(sourceLength + 1);
|
||||
|
|
@ -856,30 +882,41 @@ class Language {
|
|||
);
|
||||
}
|
||||
|
||||
static load(url) {
|
||||
static load(input) {
|
||||
let bytes;
|
||||
if (
|
||||
typeof process !== 'undefined' &&
|
||||
process.versions &&
|
||||
process.versions.node
|
||||
) {
|
||||
const fs = require('fs');
|
||||
bytes = Promise.resolve(fs.readFileSync(url));
|
||||
if (input instanceof Uint8Array) {
|
||||
bytes = Promise.resolve(input);
|
||||
} else {
|
||||
bytes = fetch(url)
|
||||
.then(response => response.arrayBuffer()
|
||||
.then(buffer => {
|
||||
if (response.ok) {
|
||||
return new Uint8Array(buffer);
|
||||
} else {
|
||||
const body = new TextDecoder('utf-8').decode(buffer);
|
||||
throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`)
|
||||
}
|
||||
}));
|
||||
const url = input;
|
||||
if (
|
||||
typeof process !== 'undefined' &&
|
||||
process.versions &&
|
||||
process.versions.node
|
||||
) {
|
||||
const fs = require('fs');
|
||||
bytes = Promise.resolve(fs.readFileSync(url));
|
||||
} else {
|
||||
bytes = fetch(url)
|
||||
.then(response => response.arrayBuffer()
|
||||
.then(buffer => {
|
||||
if (response.ok) {
|
||||
return new Uint8Array(buffer);
|
||||
} else {
|
||||
const body = new TextDecoder('utf-8').decode(buffer);
|
||||
throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`)
|
||||
}
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
// emscripten-core/emscripten#12969
|
||||
const loadModule =
|
||||
typeof loadSideModule === 'function'
|
||||
? loadSideModule
|
||||
: loadWebAssemblyModule;
|
||||
|
||||
return bytes
|
||||
.then(bytes => loadSideModule(bytes, {loadAsync: true}))
|
||||
.then(bytes => loadModule(bytes, {loadAsync: true}))
|
||||
.then(mod => {
|
||||
const symbolNames = Object.keys(mod)
|
||||
const functionName = symbolNames.find(key =>
|
||||
|
|
@ -1139,3 +1176,4 @@ function marshalEdit(edit) {
|
|||
}
|
||||
|
||||
Parser.Language = Language;
|
||||
Parser.Parser = Parser;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,10 @@
|
|||
"_ts_init",
|
||||
"_ts_language_field_count",
|
||||
"_ts_language_field_name_for_id",
|
||||
"_ts_language_type_is_named_wasm",
|
||||
"_ts_language_type_is_visible_wasm",
|
||||
"_ts_language_symbol_count",
|
||||
"_ts_language_symbol_for_name",
|
||||
"_ts_language_symbol_name",
|
||||
"_ts_language_symbol_type",
|
||||
"_ts_language_version",
|
||||
|
|
@ -79,6 +82,7 @@
|
|||
"_ts_query_predicates_for_pattern",
|
||||
"_ts_query_string_count",
|
||||
"_ts_query_string_value_for_id",
|
||||
"_ts_tree_copy",
|
||||
"_ts_tree_cursor_current_field_id_wasm",
|
||||
"_ts_tree_cursor_current_node_id_wasm",
|
||||
"_ts_tree_cursor_current_node_is_missing_wasm",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "web-tree-sitter",
|
||||
"version": "0.17.1",
|
||||
"version": "0.18.0",
|
||||
"description": "Tree-sitter bindings for the web",
|
||||
"main": "tree-sitter.js",
|
||||
"types": "tree-sitter-web.d.ts",
|
||||
|
|
|
|||
44
lib/binding_web/test/language-test.js
Normal file
44
lib/binding_web/test/language-test.js
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
const { assert } = require("chai");
|
||||
let JavaScript;
|
||||
|
||||
describe("Language", () => {
|
||||
before(async () => ({ JavaScript } = await require("./helper")));
|
||||
|
||||
describe(".fieldIdForName, .fieldNameForId", () => {
|
||||
it("converts between the string and integer representations of fields", () => {
|
||||
const nameId = JavaScript.fieldIdForName("name");
|
||||
const bodyId = JavaScript.fieldIdForName("body");
|
||||
|
||||
assert.isBelow(nameId, JavaScript.fieldCount);
|
||||
assert.isBelow(bodyId, JavaScript.fieldCount);
|
||||
assert.equal("name", JavaScript.fieldNameForId(nameId));
|
||||
assert.equal("body", JavaScript.fieldNameForId(bodyId));
|
||||
});
|
||||
|
||||
it("handles invalid inputs", () => {
|
||||
assert.equal(null, JavaScript.fieldIdForName("namezzz"));
|
||||
assert.equal(null, JavaScript.fieldNameForId(-1));
|
||||
assert.equal(null, JavaScript.fieldNameForId(10000));
|
||||
});
|
||||
});
|
||||
|
||||
describe(".idForNodeType, .nodeTypeForId, .nodeTypeIsNamed", () => {
|
||||
it("converts between the string and integer representations of a node type", () => {
|
||||
const exportStatementId = JavaScript.idForNodeType("export_statement", true);
|
||||
const starId = JavaScript.idForNodeType("*", false);
|
||||
|
||||
assert.isBelow(exportStatementId, JavaScript.nodeTypeCount);
|
||||
assert.isBelow(starId, JavaScript.nodeTypeCount);
|
||||
assert.equal(true, JavaScript.nodeTypeIsNamed(exportStatementId))
|
||||
assert.equal("export_statement", JavaScript.nodeTypeForId(exportStatementId))
|
||||
assert.equal(false, JavaScript.nodeTypeIsNamed(starId))
|
||||
assert.equal("*", JavaScript.nodeTypeForId(starId))
|
||||
});
|
||||
|
||||
it("handles invalid inputs", () => {
|
||||
assert.equal(null, JavaScript.nodeTypeForId(-1));
|
||||
assert.equal(null, JavaScript.nodeTypeForId(10000));
|
||||
assert.equal(null, JavaScript.idForNodeType("export_statement", false));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -323,6 +323,31 @@ describe("Tree", () => {
|
|||
assert(!cursor.gotoParent());
|
||||
})
|
||||
});
|
||||
|
||||
describe(".copy", () => {
|
||||
it("creates another tree that remains stable if the original tree is edited", () => {
|
||||
input = 'abc + cde';
|
||||
tree = parser.parse(input);
|
||||
assert.equal(
|
||||
tree.rootNode.toString(),
|
||||
"(program (expression_statement (binary_expression left: (identifier) right: (identifier))))"
|
||||
);
|
||||
|
||||
const tree2 = tree.copy();
|
||||
([input, edit] = spliceInput(input, 3, 0, '123'));
|
||||
assert.equal(input, 'abc123 + cde');
|
||||
tree.edit(edit);
|
||||
|
||||
const leftNode = tree.rootNode.firstChild.firstChild.firstChild;
|
||||
const leftNode2 = tree2.rootNode.firstChild.firstChild.firstChild;
|
||||
const rightNode = tree.rootNode.firstChild.firstChild.lastChild;
|
||||
const rightNode2 = tree2.rootNode.firstChild.firstChild.lastChild;
|
||||
assert.equal(leftNode.endIndex, 6)
|
||||
assert.equal(leftNode2.endIndex, 3)
|
||||
assert.equal(rightNode.startIndex, 9)
|
||||
assert.equal(rightNode2.startIndex, 6)
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
function spliceInput(input, startIndex, lengthRemoved, newText) {
|
||||
|
|
|
|||
7
lib/binding_web/tree-sitter-web.d.ts
vendored
7
lib/binding_web/tree-sitter-web.d.ts
vendored
|
|
@ -127,13 +127,18 @@ declare module 'web-tree-sitter' {
|
|||
}
|
||||
|
||||
class Language {
|
||||
static load(path: string): Promise<Language>;
|
||||
static load(input: string | Uint8Array): Promise<Language>;
|
||||
|
||||
readonly version: number;
|
||||
readonly fieldCount: number;
|
||||
readonly nodeTypeCount: number;
|
||||
|
||||
fieldNameForId(fieldId: number): string | null;
|
||||
fieldIdForName(fieldName: string): number | null;
|
||||
idForNodeType(type: string, named: boolean): number;
|
||||
nodeTypeForId(typeId: number): string | null;
|
||||
nodeTypeIsNamed(typeId: number): boolean;
|
||||
nodeTypeIsVisible(typeId: number): boolean;
|
||||
query(source: string): Query;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -448,6 +448,7 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
|
|||
TSTreeCursor res = {NULL, NULL, {0, 0}};
|
||||
TreeCursor *copy = (TreeCursor *)&res;
|
||||
copy->tree = cursor->tree;
|
||||
array_init(©->stack);
|
||||
array_push_all(©->stack, &cursor->stack);
|
||||
return res;
|
||||
}
|
||||
|
|
|
|||
2
test/fixtures/error_corpus/ruby_errors.txt
vendored
2
test/fixtures/error_corpus/ruby_errors.txt
vendored
|
|
@ -10,7 +10,7 @@ c
|
|||
---
|
||||
|
||||
(program
|
||||
(method_call
|
||||
(call
|
||||
method: (identifier)
|
||||
(ERROR (heredoc_beginning))
|
||||
arguments: (argument_list
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue