Merge branch 'master' of https://github.com/tree-sitter/tree-sitter into feat/snapshot-testing

2019-09-06 10:44:32 +08:00 · 2019-09-06 10:44:32 +08:00 · 9770a0c9f6
commit 9770a0c9f6
parent 807fdf3ef0 f9a3998029
17 changed files with 643 additions and 373 deletions
--- a/cli/src/highlight.rs
+++ b/cli/src/highlight.rs
@ -293,11 +293,11 @@ pub fn ansi(
    {
        let event = event.map_err(|e| e.to_string())?;
        match event {
-            HighlightEvent::Source(s) => {
+            HighlightEvent::Source { start, end } => {
                if let Some(style) = highlight_stack.last().and_then(|s| theme.ansi_style(*s)) {
-                    write!(&mut stdout, "{}", style.paint(s))?;
+                    style.paint(&source[start..end]).write_to(&mut stdout)?;
                } else {
-                    write!(&mut stdout, "{}", s)?;
+                    stdout.write_all(&source[start..end])?;
                }
            }
            HighlightEvent::HighlightStart(h) => {
--- a/cli/src/test.rs
+++ b/cli/src/test.rs
@ -17,7 +17,7 @@ lazy_static! {
        .multi_line(true)
        .build()
        .unwrap();
-    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"\r?\n---+\r?\n")
+    static ref DIVIDER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^---+\r?\n")
        .multi_line(true)
        .build()
        .unwrap();
@ -183,11 +183,7 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
        let mut children = Vec::new();
        for entry in fs::read_dir(path)? {
            let entry = entry?;
-            let hidden = entry
-                .file_name()
-                .to_str()
-                .unwrap_or("")
-                .starts_with(".");
+            let hidden = entry.file_name().to_str().unwrap_or("").starts_with(".");
            if !hidden {
                children.push(parse_tests(&entry.path())?);
            }
@ -206,29 +202,42 @@ pub fn strip_sexp_fields(sexp: String) -> String {
 fn parse_test_content(name: String, content: String) -> TestEntry {
    let mut children = Vec::new();
    let bytes = content.as_bytes();
-    let mut previous_name = String::new();
-    let mut previous_header_end = 0;
-    for header_match in HEADER_REGEX
+    let mut prev_name = String::new();
+    let mut prev_header_end = 0;
+
+    // Identify all of the test descriptions using the `======` headers.
+    for (header_start, header_end) in HEADER_REGEX
        .find_iter(&bytes)
        .map(|m| (m.start(), m.end()))
        .chain(Some((bytes.len(), bytes.len())))
    {
-        let (header_start, header_end) = header_match;
-        if previous_header_end > 0 {
-            if let Some(divider_match) =
-                DIVIDER_REGEX.find(&bytes[previous_header_end..header_start])
-            {
-                let (divider_start, divider_end) = (
-                    previous_header_end + divider_match.start(),
-                    previous_header_end + divider_match.end(),
-                );
+        // Find the longest line of dashes following each test description.
+        // That is the divider between input and expected output.
+        if prev_header_end > 0 {
+            let divider_match = DIVIDER_REGEX
+                .find_iter(&bytes[prev_header_end..header_start])
+                .map(|m| (prev_header_end + m.start(), prev_header_end + m.end()))
+                .max_by_key(|(start, end)| end - start);
+            if let Some((divider_start, divider_end)) = divider_match {
                if let Ok(output) = str::from_utf8(&bytes[divider_end..header_start]) {
-                    let input = bytes[previous_header_end..divider_start].to_vec();
+                    let mut input = bytes[prev_header_end..divider_start].to_vec();
+
+                    // Remove trailing newline from the input.
+                    input.pop();
+                    if input.last() == Some(&b'\r') {
+                        input.pop();
+                    }
+
+                    // Normalize the whitespace in the expected output.
                    let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string();
                    let output = output.replace(" )", ")");
+
+                    // Identify if the expected output has fields indicated. If not, then
+                    // fields will not be checked.
                    let has_fields = SEXP_FIELD_REGEX.is_match(&output);
+
                    children.push(TestEntry::Example {
-                        name: previous_name,
+                        name: prev_name,
                        input,
                        output,
                        has_fields,
@ -236,10 +245,10 @@ fn parse_test_content(name: String, content: String) -> TestEntry {
                }
            }
        }
-        previous_name = String::from_utf8_lossy(&bytes[header_start..header_end])
+        prev_name = String::from_utf8_lossy(&bytes[header_start..header_end])
            .trim_matches(|c| char::is_whitespace(c) || c == '=')
            .to_string();
-        previous_header_end = header_end;
+        prev_header_end = header_end;
    }
    TestEntry::Group { name, children }
 }
@ -296,4 +305,56 @@ d
            }
        );
    }
+
+    #[test]
+    fn test_parse_test_content_with_dashes_in_source_code() {
+        let entry = parse_test_content(
+            "the-filename".to_string(),
+            r#"
+==================
+Code with dashes
+==================
+abc
+---
+defg
+----
+hijkl
+-------
+
+(a (b))
+
+=========================
+Code ending with dashes
+=========================
+abc
+-----------
+-------------------
+
+(c (d))
+        "#
+            .trim()
+            .to_string(),
+        );
+
+        assert_eq!(
+            entry,
+            TestEntry::Group {
+                name: "the-filename".to_string(),
+                children: vec![
+                    TestEntry::Example {
+                        name: "Code with dashes".to_string(),
+                        input: "abc\n---\ndefg\n----\nhijkl".as_bytes().to_vec(),
+                        output: "(a (b))".to_string(),
+                        has_fields: false,
+                    },
+                    TestEntry::Example {
+                        name: "Code ending with dashes".to_string(),
+                        input: "abc\n-----------".as_bytes().to_vec(),
+                        output: "(c (d))".to_string(),
+                        has_fields: false,
+                    },
+                ]
+            }
+        );
+    }
 }
--- a/cli/src/tests/highlight_test.rs
+++ b/cli/src/tests/highlight_test.rs
@ -420,10 +420,10 @@ fn test_highlighting_via_c_api() {
    assert_eq!(
        lines,
        vec![
-            "&lt;<span class=tag>script</span>&gt;",
-            "<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>&#39;c&#39;</span><span>)</span><span>;</span>",
-            "<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>",
-            "&lt;/<span class=tag>script</span>&gt;",
+            "&lt;<span class=tag>script</span>&gt;\n",
+            "<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>&#39;c&#39;</span><span>)</span><span>;</span>\n",
+            "<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>\n",
+            "&lt;/<span class=tag>script</span>&gt;\n",
        ]
    );

@ -431,6 +431,23 @@ fn test_highlighting_via_c_api() {
    c::ts_highlight_buffer_delete(buffer);
 }

+#[test]
+fn test_decode_utf8_lossy() {
+    use tree_sitter_highlight::util::LossyUtf8;
+
+    let parts = LossyUtf8::new(b"hi").collect::<Vec<_>>();
+    assert_eq!(parts, vec!["hi"]);
+
+    let parts = LossyUtf8::new(b"hi\xc0\xc1bye").collect::<Vec<_>>();
+    assert_eq!(parts, vec!["hi", "\u{fffd}", "\u{fffd}", "bye"]);
+
+    let parts = LossyUtf8::new(b"\xc0\xc1bye").collect::<Vec<_>>();
+    assert_eq!(parts, vec!["\u{fffd}", "\u{fffd}", "bye"]);
+
+    let parts = LossyUtf8::new(b"hello\xc0\xc1").collect::<Vec<_>>();
+    assert_eq!(parts, vec!["hello", "\u{fffd}", "\u{fffd}"]);
+}
+
 fn c_string(s: &str) -> CString {
    CString::new(s.as_bytes().to_vec()).unwrap()
 }
@ -466,11 +483,12 @@ fn to_token_vector<'a>(
    language: Language,
    property_sheet: &'a PropertySheet<Properties>,
 ) -> Result<Vec<Vec<(&'a str, Vec<Highlight>)>>, Error> {
+    let src = src.as_bytes();
    let mut lines = Vec::new();
    let mut highlights = Vec::new();
    let mut line = Vec::new();
    for event in highlight(
-        src.as_bytes(),
+        src,
        language,
        property_sheet,
        None,
@ -481,7 +499,8 @@ fn to_token_vector<'a>(
            HighlightEvent::HighlightEnd => {
                highlights.pop();
            }
-            HighlightEvent::Source(s) => {
+            HighlightEvent::Source { start, end } => {
+                let s = str::from_utf8(&src[start..end]).unwrap();
                for (i, l) in s.split("\n").enumerate() {
                    let l = l.trim_end_matches('\r');
                    if i > 0 {
--- a/cli/src/tests/tree_test.rs
+++ b/cli/src/tests/tree_test.rs
@ -190,7 +190,7 @@ fn test_tree_edit() {
 }

 #[test]
-fn test_tree_walk() {
+fn test_tree_cursor() {
    let mut parser = Parser::new();
    parser.set_language(get_language("rust")).unwrap();

@ -225,6 +225,43 @@ fn test_tree_walk() {
    assert_eq!(cursor.node().is_named(), true);
 }

+#[test]
+fn test_tree_cursor_fields() {
+    let mut parser = Parser::new();
+    parser.set_language(get_language("javascript")).unwrap();
+
+    let tree = parser
+        .parse("function /*1*/ bar /*2*/ () {}", None)
+        .unwrap();
+
+    let mut cursor = tree.walk();
+    assert_eq!(cursor.node().kind(), "program");
+
+    cursor.goto_first_child();
+    assert_eq!(cursor.node().kind(), "function_declaration");
+    assert_eq!(cursor.field_name(), None);
+
+    cursor.goto_first_child();
+    assert_eq!(cursor.node().kind(), "function");
+    assert_eq!(cursor.field_name(), None);
+
+    cursor.goto_next_sibling();
+    assert_eq!(cursor.node().kind(), "comment");
+    assert_eq!(cursor.field_name(), None);
+
+    cursor.goto_next_sibling();
+    assert_eq!(cursor.node().kind(), "identifier");
+    assert_eq!(cursor.field_name(), Some("name"));
+
+    cursor.goto_next_sibling();
+    assert_eq!(cursor.node().kind(), "comment");
+    assert_eq!(cursor.field_name(), None);
+
+    cursor.goto_next_sibling();
+    assert_eq!(cursor.node().kind(), "formal_parameters");
+    assert_eq!(cursor.field_name(), Some("parameters"));
+}
+
 #[test]
 fn test_tree_node_equality() {
    let mut parser = Parser::new();