Merge branch 'tags-utf16-columns' into tagging-improvements

2020-07-09 10:40:46 -07:00 · 2020-07-09 10:40:46 -07:00 · fef72fb434
commit fef72fb434
parent d9d3da9942 255cf0a9cf
10 changed files with 291 additions and 256 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -71,5 +71,3 @@ cache:
  cargo: true
  directories:
    - target/emsdk
-    - test/fixtures/grammars
-    - $HOME/.emscripten_cache
--- a/cli/src/tests/mod.rs
+++ b/cli/src/tests/mod.rs
@ -3,6 +3,7 @@ mod helpers;
 mod highlight_test;
 mod node_test;
 mod parser_test;
+mod pathological_test;
 mod query_test;
 mod tags_test;
 mod test_highlight_test;
--- a/cli/src/tests/pathological_test.rs
+++ b/cli/src/tests/pathological_test.rs
@ -0,0 +1,15 @@
+use super::helpers::allocations;
+use super::helpers::fixtures::get_language;
+use tree_sitter::Parser;
+
+#[test]
+fn test_pathological_example_1() {
+    let language = "cpp";
+    let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
+
+    allocations::record(|| {
+        let mut parser = Parser::new();
+        parser.set_language(get_language(language)).unwrap();
+        parser.parse(source, None).unwrap();
+    });
+}
--- a/cli/src/tests/query_test.rs
+++ b/cli/src/tests/query_test.rs
@ -1008,6 +1008,41 @@ fn test_query_matches_with_alternatives_at_root() {
    })
 }

+#[test]
+fn test_query_matches_with_alternatives_under_fields() {
+    allocations::record(|| {
+        let language = get_language("javascript");
+        let query = Query::new(
+            language,
+            r#"
+            (assignment_expression
+                left: [
+                    (identifier) @variable
+                    (member_expression property: (property_identifier) @variable)
+                ])
+            "#,
+        )
+        .unwrap();
+
+        assert_query_matches(
+            language,
+            &query,
+            "
+            a = b;
+            b = c.d;
+            e.f = g;
+            h.i = j.k;
+            ",
+            &[
+                (0, vec![("variable", "a")]),
+                (0, vec![("variable", "b")]),
+                (0, vec![("variable", "f")]),
+                (0, vec![("variable", "i")]),
+            ],
+        );
+    });
+}
+
 #[test]
 fn test_query_matches_in_language_with_simple_aliases() {
    allocations::record(|| {
--- a/cli/src/tests/tags_test.rs
+++ b/cli/src/tests/tags_test.rs
@ -1,28 +1,29 @@
 use super::helpers::allocations;
 use super::helpers::fixtures::{get_language, get_language_queries_path};
+use std::ffi::CStr;
 use std::ffi::CString;
 use std::{fs, ptr, slice, str};
-use std::ffi::CStr;
+use tree_sitter::Point;
 use tree_sitter_tags::c_lib as c;
 use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};

 const PYTHON_TAG_QUERY: &'static str = r#"
 (
-    (function_definition
-      name: (identifier) @name
-      body: (block . (expression_statement (string) @doc))) @definition.function
-    (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+  (function_definition
+    name: (identifier) @name
+    body: (block . (expression_statement (string) @doc))) @definition.function
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
 )

 (function_definition
  name: (identifier) @name) @definition.function

 (
-    (class_definition
-        name: (identifier) @name
-        body: (block
-            . (expression_statement (string) @doc))) @definition.class
-    (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
+  (class_definition
+    name: (identifier) @name
+    body: (block
+      . (expression_statement (string) @doc))) @definition.class
+  (#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
 )

 (class_definition
@ -30,6 +31,10 @@ const PYTHON_TAG_QUERY: &'static str = r#"

 (call
  function: (identifier) @name) @reference.call
+
+(call
+  function: (attribute
+    attribute: (identifier) @name)) @reference.call
 "#;

 const JS_TAG_QUERY: &'static str = r#"
@ -98,10 +103,12 @@ fn test_tags_python() {
        .collect::<Result<Vec<_>, _>>()
        .unwrap();

-
    assert_eq!(
        tags.iter()
-            .map(|t| (substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id)))
+            .map(|t| (
+                substr(source, &t.name_range),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
            .collect::<Vec<_>>(),
        &[
            ("Customer", "class"),
@ -111,10 +118,7 @@ fn test_tags_python() {
    );

    assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
-    assert_eq!(
-        substr(source, &tags[1].line_range),
-        "def age(self):"
-    );
+    assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
    assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
    assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
 }
@ -152,12 +156,16 @@ fn test_tags_javascript() {

    assert_eq!(
        tags.iter()
-            .map(|t| (substr(source, &t.name_range), tags_config.syntax_type_name(t.syntax_type_id)))
+            .map(|t| (
+                substr(source, &t.name_range),
+                t.span.clone(),
+                tags_config.syntax_type_name(t.syntax_type_id)
+            ))
            .collect::<Vec<_>>(),
        &[
-            ("Customer", "class"),
-            ("getAge", "method"),
-            ("Agent", "class")
+            ("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
+            ("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
+            ("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
        ]
    );
    assert_eq!(
@ -168,6 +176,26 @@ fn test_tags_javascript() {
    assert_eq!(tags[2].docs, None);
 }

+#[test]
+fn test_tags_columns_measured_in_utf16_code_units() {
+    let language = get_language("python");
+    let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
+    let mut tag_context = TagsContext::new();
+
+    let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
+
+    let tag = tag_context
+        .generate_tags(&tags_config, source, None)
+        .unwrap()
+        .next()
+        .unwrap()
+        .unwrap();
+
+    assert_eq!(substr(source, &tag.name_range), "hello_α_ω");
+    assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
+    assert_eq!(tag.utf16_column_range, 9..18);
+}
+
 #[test]
 fn test_tags_ruby() {
    let language = get_language("ruby");
@ -211,7 +239,7 @@ fn test_tags_ruby() {
            ))
            .collect::<Vec<_>>(),
        &[
-            ("foo", "method", (2, 0)),
+            ("foo", "method", (2, 4)),
            ("bar", "call", (7, 4)),
            ("a", "call", (7, 8)),
            ("b", "call", (7, 11)),
@ -328,10 +356,12 @@ fn test_tags_via_c_api() {

        let syntax_types: Vec<&str> = unsafe {
            let mut len: u32 = 0;
-            let ptr = c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
-            slice::from_raw_parts(ptr, len as usize).iter().map(|i| {
-                CStr::from_ptr(*i).to_str().unwrap()
-            }).collect()
+            let ptr =
+                c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
+            slice::from_raw_parts(ptr, len as usize)
+                .iter()
+                .map(|i| CStr::from_ptr(*i).to_str().unwrap())
+                .collect()
        };

        assert_eq!(
@ -344,18 +374,8 @@ fn test_tags_via_c_api() {
                ))
                .collect::<Vec<_>>(),
            &[
-                (
-                    "function",
-                    "b",
-                    "function b() {",
-                    "one\ntwo\nthree"
-                ),
-                (
-                    "class",
-                    "C",
-                    "class C extends D {",
-                    "four\nfive"
-                ),
+                ("function", "b", "function b() {", "one\ntwo\nthree"),
+                ("class", "C", "class C extends D {", "four\nfive"),
                ("call", "b", "b(a);", "")
            ]
        );
--- a/docs/index.html
+++ b/docs/index.html
@ -1,128 +0,0 @@
-<!doctype html>
-<html lang="en">
-  <head>
-    <meta charset="utf-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
-    <title>Black Lives Matter</title>
-    <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.5.0/css/bootstrap.min.css" integrity="sha384-9aIt2nRpC12Uk9gS9baDl411NQApFmC26EwAOH8WgZl5MYYxFfc+NcPb1dKGj7Sk" crossorigin="anonymous"
-    <meta name="theme-color" content="#563d7c">
-
-<style>
-a,
-a:focus,
-a:hover {
-  color: #fff;
-}
-
-.btn-secondary,
-.btn-secondary:hover,
-.btn-secondary:focus {
-  color: #333;
-  text-shadow: none; /* Prevent inheritance from `body` */
-  background-color: #fff;
-  border: .05rem solid #fff;
-}
-
-html,
-body {
-  height: 100%;
-  background-color: #333;
-}
-
-body {
-  display: -ms-flexbox;
-  display: flex;
-  color: #fff;
-  text-shadow: 0 .05rem .1rem rgba(0, 0, 0, .5);
-  box-shadow: inset 0 0 5rem rgba(0, 0, 0, .5);
-}
-
-.cover-container {
-  max-width: 42em;
-}
-
-.masthead {
-  margin-bottom: 2rem;
-}
-
-.masthead-brand {
-  margin-bottom: 0;
-}
-
-.nav-masthead .nav-link {
-  padding: .25rem 0;
-  font-weight: 700;
-  color: rgba(255, 255, 255, .5);
-  background-color: transparent;
-  border-bottom: .25rem solid transparent;
-}
-
-.nav-masthead .nav-link:hover,
-.nav-masthead .nav-link:focus {
-  border-bottom-color: rgba(255, 255, 255, .25);
-}
-
-.nav-masthead .nav-link + .nav-link {
-  margin-left: 1rem;
-}
-
-.nav-masthead .active {
-  color: #fff;
-  border-bottom-color: #fff;
-}
-
-@media (min-width: 48em) {
-  .masthead-brand {
-    float: left;
-  }
-  .nav-masthead {
-    float: right;
-  }
-}
-
-.cover {
-  padding: 0 1.5rem;
-}
-.cover .btn-lg {
-  padding: .75rem 1.25rem;
-  font-weight: 700;
-}
-
-.mastfoot {
-  color: rgba(255, 255, 255, .5);
-}
-
-.bd-placeholder-img {
-  font-size: 1.125rem;
-  text-anchor: middle;
-  -webkit-user-select: none;
-  -moz-user-select: none;
-  -ms-user-select: none;
-  user-select: none;
-}
-
-@media (min-width: 768px) {
-  .bd-placeholder-img-lg {
-    font-size: 3.5rem;
-  }
-}
-</style>
-<link href="cover.css" rel="stylesheet">
-</head>
-
-<body class="text-center">
-  <div class="cover-container d-flex w-100 h-100 p-3 mx-auto flex-column">
-    <main role="main" class="inner cover">
-      <h1>Black Lives Matter</h1>
-      <p class="lead mastfoot"><span style='color: #dddddd;'>We stand in solidarity with</span> George Floyd, Natosha McDade, Yassin Mohamed, Finan H. Berhe, Sean Reed, Steven Demarco Taylor, Breonna Taylor, Ariane McCree, Terrance Franklin, Miles Hall, Darius Tarver, William Green, Samuel David Mallard, Kwame Jones, De’von Bailey, Christopher Whitfield, Anthony Hill, De’Von Bailey, Eric Logan, Jamarion Robinson, Gregory Hill Jr, JaQuavion Slaton, Ryan Twyman, Brandon Webber, Jimmy Atchison, Willie McCoy, Emantic Fitzgerald Bradford J, D’ettrick Griffin, Jemel Roberson, DeAndre Ballard, Botham Shem Jean, Robert Lawrence White, Anthony Lamar Smith, Ramarley Graham, Manuel Loggins Jr, Trayvon Martin, Wendell Allen, Kendrec McDade, Larry Jackson Jr, Jonathan Ferrell, Jordan Baker, Victor White III, Dontre Hamilton, Eric Garner, John Crawford III, Michael Brown, Ezell Ford, Dante Parker, Kajieme Powell, Laquan McDonald, Akai Gurley, Tamir Rice, Rumain Brisbon, Jerame Reid, Charly Keunang, Tony Robinson, Walter Scott, Freddie Gray, Brendon Glenn, Samuel DuBose, Christian Taylor, Jamar Clark, Mario Woods, Quintonio LeGrier, Gregory Gunn, Akiel Denkins, Alton Sterling, Philando Castile, Terrence Sterling, Terence Crutcher, Keith Lamont Scott, Alfred Olango, Jordan Edwards, Stephon Clark, Danny Ray Thomas, DeJuan Guillory, Patrick Harmon, Jonathan Hart, Maurice Granton, Julius Johnson, Jamee Johnson, Michael Dean...</p>
-    </main>
-
-    <footer class="mastfoot mt-auto">
-      <div class="inner">
-        <p>This site is currently offline as a small gesture of respect and solidarity.<br/>The <a href="https://github.com/tree-sitter/tree-sitter/tree/master/docs">Tree-sitter documentation</a> remains accessible on <a href="https://github.com/tree-sitter/tree-sitter">the GitHub repo</a>.</p>
-        <p>Organisations that could use your financial support include <a href="https://blacklivesmatter.com/">Black Lives Matter</a>, <a href="https://www.naacpldf.org">The NAACP Legal Defense and Educational Fund</a>, <a href="https://eji.org">The Equal Justice Initiative</a>, <a href="https://www.wetheprotesters.org">We The Protesters</a>, and the <a href="https://www.gofundme.com/f/georgefloyd">George Floyd Memorial Fund</a>.</p>
-      </div>
-    </footer>
-  </div>
-</body>
-</html>
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@ -764,17 +764,23 @@ static StackVersion ts_parser__reduce(
  bool is_extra
 ) {
  uint32_t initial_version_count = ts_stack_version_count(self->stack);
-  uint32_t removed_version_count = 0;
-  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);

+  // Pop the given number of nodes from the given version of the parse stack.
+  // If stack versions have previously merged, then there may be more than one
+  // path back through the stack. For each path, create a new parent node to
+  // contain the popped children, and push it onto the stack in place of the
+  // children.
+  StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
+  uint32_t removed_version_count = 0;
  for (uint32_t i = 0; i < pop.size; i++) {
    StackSlice slice = pop.contents[i];
    StackVersion slice_version = slice.version - removed_version_count;

-    // Error recovery can sometimes cause lots of stack versions to merge,
-    // such that a single pop operation can produce a lots of slices.
-    // Avoid creating too many stack versions in that situation.
-    if (i > 0 && slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
+    // This is where new versions are added to the parse stack. The versions
+    // will all be sorted and truncated at the end of the outer parsing loop.
+    // Allow the maximum version count to be temporarily exceeded, but only
+    // by a limited threshold.
+    if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW) {
      ts_stack_remove_version(self->stack, slice_version);
      ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
      removed_version_count++;
--- a/lib/src/query.c
+++ b/lib/src/query.c
@ -715,7 +715,7 @@ static TSQueryError ts_query__parse_pattern(
  uint32_t *capture_count,
  bool is_immediate
 ) {
-  uint32_t starting_step_index = self->steps.size;
+  const uint32_t starting_step_index = self->steps.size;

  if (stream->next == 0) return TSQueryErrorSyntax;

@ -951,7 +951,6 @@ static TSQueryError ts_query__parse_pattern(
    stream_skip_whitespace(stream);

    // Parse the pattern
-    uint32_t step_index = self->steps.size;
    TSQueryError e = ts_query__parse_pattern(
      self,
      stream,
@ -972,7 +971,22 @@ static TSQueryError ts_query__parse_pattern(
      stream->input = field_name;
      return TSQueryErrorField;
    }
-    self->steps.contents[step_index].field = field_id;
+
+    uint32_t step_index = starting_step_index;
+    QueryStep *step = &self->steps.contents[step_index];
+    for (;;) {
+      step->field = field_id;
+      if (
+        step->alternative_index != NONE &&
+        step->alternative_index > step_index &&
+        step->alternative_index < self->steps.size
+      ) {
+        step_index = step->alternative_index;
+        step = &self->steps.contents[step_index];
+      } else {
+        break;
+      }
+    }
  }

  else {
@ -1041,15 +1055,16 @@ static TSQueryError ts_query__parse_pattern(
        length
      );

+      uint32_t step_index = starting_step_index;
      for (;;) {
        query_step__add_capture(step, capture_id);
        if (
          step->alternative_index != NONE &&
-          step->alternative_index > starting_step_index &&
+          step->alternative_index > step_index &&
          step->alternative_index < self->steps.size
        ) {
-          starting_step_index = step->alternative_index;
-          step = &self->steps.contents[starting_step_index];
+          step_index = step->alternative_index;
+          step = &self->steps.contents[step_index];
        } else {
          break;
        }
--- a/tags/src/lib.rs
+++ b/tags/src/lib.rs
@ -1,12 +1,12 @@
 pub mod c_lib;

-use memchr::{memchr, memrchr};
+use memchr::memchr;
 use regex::Regex;
+use std::collections::HashMap;
+use std::ffi::{CStr, CString};
 use std::ops::Range;
 use std::sync::atomic::{AtomicUsize, Ordering};
-use std::{fmt, mem, str};
-use std::ffi::{CStr, CString};
-use std::collections::HashMap;
+use std::{char, fmt, mem, str};
 use tree_sitter::{
    Language, Parser, Point, Query, QueryCursor, QueryError, QueryPredicateArg, Tree,
 };
@ -48,6 +48,7 @@ pub struct Tag {
    pub name_range: Range<usize>,
    pub line_range: Range<usize>,
    pub span: Range<Point>,
+    pub utf16_column_range: Range<usize>,
    pub docs: Option<String>,
    pub is_definition: bool,
    pub syntax_type_id: u32,
@ -142,24 +143,31 @@ impl TagsConfiguration {
                    } else if name.starts_with("reference.") {
                        name.trim_start_matches("reference.")
                    } else {
-                        return Err(Error::InvalidCapture(name.to_string()))
+                        return Err(Error::InvalidCapture(name.to_string()));
                    };

                    if let Ok(cstr) = CString::new(kind) {
                        let c_kind = cstr.to_bytes_with_nul().to_vec().into_boxed_slice();
-                        let syntax_type_id = syntax_type_names.iter().position(|n| { n == &c_kind }).unwrap_or_else(|| {
-                            syntax_type_names.push(c_kind);
-                            syntax_type_names.len() - 1
-                        }) as u32;
-                        capture_map.insert(i as u32, NamedCapture{ syntax_type_id, is_definition });
+                        let syntax_type_id = syntax_type_names
+                            .iter()
+                            .position(|n| n == &c_kind)
+                            .unwrap_or_else(|| {
+                                syntax_type_names.push(c_kind);
+                                syntax_type_names.len() - 1
+                            }) as u32;
+                        capture_map.insert(
+                            i as u32,
+                            NamedCapture {
+                                syntax_type_id,
+                                is_definition,
+                            },
+                        );
                    }
                }
            }
        }

-        let c_syntax_type_names = syntax_type_names.iter().map( |s| {
-            s.as_ptr()
-        }).collect();
+        let c_syntax_type_names = syntax_type_names.iter().map(|s| s.as_ptr()).collect();

        let pattern_info = (0..query.pattern_count())
            .map(|pattern_index| {
@ -219,7 +227,8 @@ impl TagsConfiguration {

    pub fn syntax_type_name(&self, id: u32) -> &str {
        unsafe {
-            let cstr = CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const i8).to_bytes();
+            let cstr = CStr::from_ptr(self.syntax_type_names[id as usize].as_ptr() as *const i8)
+                .to_bytes();
            str::from_utf8(cstr).expect("syntax type name was not valid utf-8")
        }
    }
@ -330,7 +339,7 @@ where
                    continue;
                }

-                let mut name_range = None;
+                let mut name_node = None;
                let mut doc_nodes = Vec::new();
                let mut tag_node = None;
                let mut syntax_type_id = 0;
@ -345,7 +354,7 @@ where
                    }

                    if index == self.config.name_capture_index {
-                        name_range = Some(capture.node.byte_range());
+                        name_node = Some(capture.node);
                    } else if index == self.config.doc_capture_index {
                        doc_nodes.push(capture.node);
                    }
@ -357,7 +366,9 @@ where
                    }
                }

-                if let (Some(tag_node), Some(name_range)) = (tag_node, name_range) {
+                if let (Some(tag_node), Some(name_node)) = (tag_node, name_node) {
+                    let name_range = name_node.byte_range();
+
                    if pattern_info.name_must_be_non_local {
                        let mut is_local = false;
                        for scope in self.scopes.iter().rev() {
@ -424,41 +435,33 @@ where
                    // Only create one tag per node. The tag queue is sorted by node position
                    // to allow for fast lookup.
                    let range = tag_node.byte_range();
-                    match self
-                        .tag_queue
-                        .binary_search_by_key(&(name_range.end, name_range.start), |(tag, _)| {
-                            (tag.name_range.end, tag.name_range.start)
-                        }) {
+                    let span = name_node.start_position()..name_node.end_position();
+                    let utf16_column_range =
+                        get_utf16_column_range(self.source, &name_range, &span);
+                    let line_range =
+                        line_range(self.source, name_range.start, span.start, MAX_LINE_LEN);
+                    let tag = Tag {
+                        line_range,
+                        span,
+                        utf16_column_range,
+                        range,
+                        name_range,
+                        docs,
+                        is_definition,
+                        syntax_type_id,
+                    };
+                    match self.tag_queue.binary_search_by_key(
+                        &(tag.name_range.end, tag.name_range.start),
+                        |(tag, _)| (tag.name_range.end, tag.name_range.start),
+                    ) {
                        Ok(i) => {
-                            let (tag, pattern_index) = &mut self.tag_queue[i];
+                            let (existing_tag, pattern_index) = &mut self.tag_queue[i];
                            if *pattern_index > mat.pattern_index {
                                *pattern_index = mat.pattern_index;
-                                *tag = Tag {
-                                    line_range: line_range(self.source, range.start, MAX_LINE_LEN),
-                                    span: tag_node.start_position()..tag_node.end_position(),
-                                    range,
-                                    name_range,
-                                    docs,
-                                    syntax_type_id,
-                                    is_definition,
-                                };
+                                *existing_tag = tag;
                            }
                        }
-                        Err(i) => self.tag_queue.insert(
-                            i,
-                            (
-                                Tag {
-                                    line_range: line_range(self.source, range.start, MAX_LINE_LEN),
-                                    span: tag_node.start_position()..tag_node.end_position(),
-                                    range,
-                                    name_range,
-                                    docs,
-                                    syntax_type_id,
-                                    is_definition,
-                                },
-                                mat.pattern_index,
-                            ),
-                        ),
+                        Err(i) => self.tag_queue.insert(i, (tag, mat.pattern_index)),
                    }
                }
            }
@ -484,29 +487,103 @@ impl From<QueryError> for Error {
    }
 }

-fn line_range(text: &[u8], index: usize, max_line_len: usize) -> Range<usize> {
-    let start = memrchr(b'\n', &text[0..index]).map_or(0, |i| i + 1);
-    let max_line_len = max_line_len.min(text.len() - start);
-    let end = start + memchr(b'\n', &text[start..(start + max_line_len)]).unwrap_or(max_line_len);
-    trim_end(text, trim_start(text, start..end))
+pub struct LossyUtf8<'a> {
+    bytes: &'a [u8],
+    in_replacement: bool,
 }

-fn trim_start(text: &[u8], r: Range<usize>) -> Range<usize> {
-    for (index, c) in text[r.start..r.end].iter().enumerate() {
-        if !c.is_ascii_whitespace() {
-            return (r.start+index)..r.end
+impl<'a> LossyUtf8<'a> {
+    pub fn new(bytes: &'a [u8]) -> Self {
+        LossyUtf8 {
+            bytes,
+            in_replacement: false,
        }
    }
-    return r
 }

-fn trim_end(text: &[u8], r: Range<usize>) -> Range<usize> {
-    for (index, c) in text[r.start..r.end].iter().rev().enumerate() {
-        if !c.is_ascii_whitespace() {
-            return r.start..(r.end-index)
+impl<'a> Iterator for LossyUtf8<'a> {
+    type Item = &'a str;
+
+    fn next(&mut self) -> Option<&'a str> {
+        if self.bytes.is_empty() {
+            return None;
+        }
+        if self.in_replacement {
+            self.in_replacement = false;
+            return Some("\u{fffd}");
+        }
+        match str::from_utf8(self.bytes) {
+            Ok(valid) => {
+                self.bytes = &[];
+                Some(valid)
+            }
+            Err(error) => {
+                if let Some(error_len) = error.error_len() {
+                    let error_start = error.valid_up_to();
+                    if error_start > 0 {
+                        let result =
+                            unsafe { str::from_utf8_unchecked(&self.bytes[..error_start]) };
+                        self.bytes = &self.bytes[(error_start + error_len)..];
+                        self.in_replacement = true;
+                        Some(result)
+                    } else {
+                        self.bytes = &self.bytes[error_len..];
+                        Some("\u{fffd}")
+                    }
+                } else {
+                    None
+                }
+            }
        }
    }
-    return r
+}
+
+fn line_range(
+    text: &[u8],
+    start_byte: usize,
+    start_point: Point,
+    max_line_len: usize,
+) -> Range<usize> {
+    // Trim leading whitespace
+    let mut line_start_byte = start_byte - start_point.column;
+    while line_start_byte < text.len() && text[line_start_byte].is_ascii_whitespace() {
+        line_start_byte += 1;
+    }
+
+    let max_line_len = max_line_len.min(text.len() - line_start_byte);
+    let text_after_line_start = &text[line_start_byte..(line_start_byte + max_line_len)];
+    let line_len = if let Some(len) = memchr(b'\n', text_after_line_start) {
+        len
+    } else if let Err(e) = str::from_utf8(text_after_line_start) {
+        e.valid_up_to()
+    } else {
+        max_line_len
+    };
+
+    // Trim trailing whitespace
+    let mut line_end_byte = line_start_byte + line_len;
+    while line_end_byte > line_start_byte && text[line_end_byte - 1].is_ascii_whitespace() {
+        line_end_byte -= 1;
+    }
+
+    line_start_byte..line_end_byte
+}
+
+fn get_utf16_column_range(
+    text: &[u8],
+    byte_range: &Range<usize>,
+    point_range: &Range<Point>,
+) -> Range<usize> {
+    let line_start_byte = byte_range.start - point_range.start.column;
+    let preceding_text_on_line = &text[line_start_byte..byte_range.start];
+    let start_col = utf16_len(preceding_text_on_line);
+    start_col..(start_col + utf16_len(&text[byte_range.clone()]))
+}
+
+fn utf16_len(bytes: &[u8]) -> usize {
+    LossyUtf8::new(bytes)
+        .flat_map(|chunk| chunk.chars().map(char::len_utf16))
+        .sum()
 }

 #[cfg(test)]
@ -515,30 +592,26 @@ mod tests {

    #[test]
    fn test_get_line() {
-        let text = b"abc\ndefg\nhijkl";
-        assert_eq!(line_range(text, 0, 10), 0..3);
-        assert_eq!(line_range(text, 1, 10), 0..3);
-        assert_eq!(line_range(text, 2, 10), 0..3);
-        assert_eq!(line_range(text, 3, 10), 0..3);
-        assert_eq!(line_range(text, 1, 2), 0..2);
-        assert_eq!(line_range(text, 4, 10), 4..8);
-        assert_eq!(line_range(text, 5, 10), 4..8);
-        assert_eq!(line_range(text, 11, 10), 9..14);
+        let text = "abc\ndefg❤hij\nklmno".as_bytes();
+        assert_eq!(line_range(text, 5, Point::new(1, 1), 30), 4..14);
+        assert_eq!(line_range(text, 5, Point::new(1, 1), 6), 4..8);
+        assert_eq!(line_range(text, 17, Point::new(2, 2), 30), 15..20);
+        assert_eq!(line_range(text, 17, Point::new(2, 2), 4), 15..19);
    }

    #[test]
    fn test_get_line_trims() {
        let text = b"   foo\nbar\n";
-        assert_eq!(line_range(text, 0, 10), 3..6);
+        assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 3..6);

        let text = b"\t func foo \nbar\n";
-        assert_eq!(line_range(text, 0, 10), 2..10);
+        assert_eq!(line_range(text, 0, Point::new(0, 0), 10), 2..10);

-        let r = line_range(text, 0, 14);
+        let r = line_range(text, 0, Point::new(0, 0), 14);
        assert_eq!(r, 2..10);
        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "func foo");

-        let r = line_range(text, 12, 14);
+        let r = line_range(text, 12, Point::new(1, 0), 14);
        assert_eq!(r, 12..15);
        assert_eq!(str::from_utf8(&text[r]).unwrap_or(""), "bar");
    }
--- a/test/fuzz/README.md
+++ b/test/fuzz/README.md
@ -22,10 +22,10 @@ The fuzzers can then be built with:
 export CLANG_DIR=$HOME/src/third_party/llvm-build/Release+Asserts/bin
 CC="$CLANG_DIR/clang" CXX="$CLANG_DIR/clang++" LINK="$CLANG_DIR/clang++" \
  LIB_FUZZER_PATH=$HOME/src/compiler-rt/lib/fuzzer/libFuzzer.a \
-  ./script/build_fuzzers
+  ./script/build-fuzzers
 ```

-This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build_fuzzers python ruby`.
+This will generate a separate fuzzer for each grammar defined in `test/fixtures/grammars` and will be instrumented with [AddressSanitizer](https://clang.llvm.org/docs/AddressSanitizer.html) and [UndefinedBehaviorSanitizer](https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html). Individual fuzzers can be built with, for example, `./script/build-fuzzers python ruby`.

 The `run-fuzzer` script handles running an individual fuzzer with a sensible default set of arguments:
 ```