Replace operation limit API with a clock-based timeout API

2019-03-14 11:13:38 -07:00 · 2019-03-14 11:13:38 -07:00 · cddb3e416d
commit cddb3e416d
parent 1e585d506f
5 changed files with 109 additions and 64 deletions
--- a/cli/src/tests/parser_test.rs
+++ b/cli/src/tests/parser_test.rs
@ -1,7 +1,7 @@
 use super::helpers::edits::{perform_edit, Edit, ReadRecorder};
 use super::helpers::fixtures::{get_language, get_test_language};
 use crate::generate::generate_parser_for_grammar;
-use std::{thread, usize};
+use std::{thread, time};
 use tree_sitter::{InputEdit, LogType, Parser, Point, Range};

 #[test]
@ -269,84 +269,108 @@ fn test_parsing_on_multiple_threads() {
    assert_eq!(child_count_differences, &[1, 2, 3, 4]);
 }

-// Operation limits
+// Timeouts

 #[test]
-fn test_parsing_with_an_operation_limit() {
+fn test_parsing_with_a_timeout() {
    let mut parser = Parser::new();
    parser.set_language(get_language("json")).unwrap();

-    // Start parsing from an infinite input. Parsing should abort after 5 "operations".
-    parser.set_operation_limit(5);
-    let mut call_count = 0;
+    // Parse an infinitely-long string, but pause after 100 microseconds of processing.
+    parser.set_timeout_micros(200);
+    let start_time = time::Instant::now();
    let tree = parser.parse_with(
-        &mut |_, _| {
-            if call_count == 0 {
-                call_count += 1;
-                b"[0"
+        &mut |offset, _| {
+            if offset == 0 {
+                b"\""
            } else {
-                call_count += 1;
-                b", 0"
+                b"x"
            }
        },
        None,
    );
    assert!(tree.is_none());
-    assert!(call_count >= 3);
-    assert!(call_count <= 8);
+    assert!(start_time.elapsed().as_micros() > 100);
+    assert!(start_time.elapsed().as_micros() < 300);

-    // Resume parsing from the previous state.
-    call_count = 0;
-    parser.set_operation_limit(20);
+    // Continue parsing, but pause after 300 microseconds of processing.
+    parser.set_timeout_micros(400);
+    let start_time = time::Instant::now();
+    let tree = parser.parse_with(
+        &mut |offset, _| {
+            if offset == 0 {
+                b"\""
+            } else {
+                b"x"
+            }
+        },
+        None,
+    );
+    assert!(tree.is_none());
+    assert!(start_time.elapsed().as_micros() > 300);
+    assert!(start_time.elapsed().as_micros() < 500);
+
+    // Finish parsing
+    parser.set_timeout_micros(1_000_000);
    let tree = parser
        .parse_with(
-            &mut |_, _| {
-                if call_count == 0 {
-                    call_count += 1;
-                    b"]"
-                } else {
+            &mut |offset, _| {
+                if offset > 1000 {
                    b""
+                } else if offset == 1000 {
+                    b"\""
+                } else {
+                    b"y"
                }
            },
            None,
        )
        .unwrap();
-    assert_eq!(
-        tree.root_node().to_sexp(),
-        "(value (array (number) (number) (number)))"
-    );
+    assert_eq!(tree.root_node().to_sexp(), "(value (string))");
 }

 #[test]
-fn test_parsing_with_a_reset_after_reaching_an_operation_limit() {
+fn test_parsing_with_a_timeout_and_a_reset() {
    let mut parser = Parser::new();
    parser.set_language(get_language("json")).unwrap();

-    parser.set_operation_limit(3);
-    let tree = parser.parse("[1234, 5, 6, 7, 8]", None);
+    parser.set_timeout_micros(30);
+    let tree = parser.parse(
+        "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
+        None,
+    );
    assert!(tree.is_none());

    // Without calling reset, the parser continues from where it left off, so
    // it does not see the changes to the beginning of the source code.
-    parser.set_operation_limit(usize::MAX);
-    let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap();
+    parser.set_timeout_micros(0);
+    let tree = parser.parse(
+        "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
+        None,
+    ).unwrap();
    assert_eq!(
-        tree.root_node().to_sexp(),
-        "(value (array (number) (number) (number) (number) (number)))"
+        tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(),
+        "string"
    );

-    parser.set_operation_limit(3);
-    let tree = parser.parse("[1234, 5, 6, 7, 8]", None);
+    parser.set_timeout_micros(30);
+    let tree = parser.parse(
+        "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
+        None,
+    );
    assert!(tree.is_none());

    // By calling reset, we force the parser to start over from scratch so
    // that it sees the changes to the beginning of the source code.
-    parser.set_operation_limit(usize::MAX);
+    parser.set_timeout_micros(0);
    parser.reset();
-    let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap();
+    let tree = parser.parse(
+        "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
+        None,
+    ).unwrap();
    assert_eq!(
-        tree.root_node().to_sexp(),
-        "(value (array (null) (number) (number) (number) (number)))"
+        tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(),
+        "null"
    );
 }

--- a/lib/binding/bindings.rs
+++ b/lib/binding/bindings.rs
@ -143,10 +143,10 @@ extern "C" {
    pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool);
 }
 extern "C" {
-    pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize;
+    pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> usize;
 }
 extern "C" {
-    pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize);
+    pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: usize);
 }
 extern "C" {
    pub fn ts_parser_reset(arg1: *mut TSParser);
--- a/lib/binding/lib.rs
+++ b/lib/binding/lib.rs
@ -230,7 +230,10 @@ impl Parser {
    pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option<Tree> {
        let bytes = input.as_ref();
        let len = bytes.len();
-        self.parse_with(&mut |i, _| if i < len { &bytes[i..] } else { &[] }, old_tree)
+        self.parse_with(
+            &mut |i, _| if i < len { &bytes[i..] } else { &[] },
+            old_tree,
+        )
    }

    pub fn parse_utf16(
@ -240,7 +243,10 @@ impl Parser {
    ) -> Option<Tree> {
        let code_points = input.as_ref();
        let len = code_points.len();
-        self.parse_utf16_with(&mut |i, _| if i < len { &code_points[i..] } else { &[] }, old_tree)
+        self.parse_utf16_with(
+            &mut |i, _| if i < len { &code_points[i..] } else { &[] },
+            old_tree,
+        )
    }

    pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>(
@ -317,8 +323,12 @@ impl Parser {
        unsafe { ffi::ts_parser_reset(self.0) }
    }

-    pub fn set_operation_limit(&mut self, limit: usize) {
-        unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) }
+    pub fn timeout_micros(&self) -> usize {
+        unsafe { ffi::ts_parser_timeout_micros(self.0) }
+    }
+
+    pub fn set_timeout_micros(&mut self, timeout_micros: usize) {
+        unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) }
    }

    pub fn set_included_ranges(&mut self, ranges: &[Range]) {
--- a/lib/include/tree_sitter/api.h
+++ b/lib/include/tree_sitter/api.h
@ -90,8 +90,8 @@ TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_
 TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding);
 bool ts_parser_enabled(const TSParser *);
 void ts_parser_set_enabled(TSParser *, bool);
-size_t ts_parser_operation_limit(const TSParser *);
-void ts_parser_set_operation_limit(TSParser *, size_t);
+size_t ts_parser_timeout_micros(const TSParser *);
+void ts_parser_set_timeout_micros(TSParser *, size_t);
 void ts_parser_reset(TSParser *);
 void ts_parser_set_included_ranges(TSParser *, const TSRange *, uint32_t);
 const TSRange *ts_parser_included_ranges(const TSParser *, uint32_t *);
--- a/lib/src/parser.c
+++ b/lib/src/parser.c
@ -1,3 +1,4 @@
+#include <time.h>
 #include <assert.h>
 #include <stdio.h>
 #include <limits.h>
@ -42,6 +43,7 @@ static const unsigned MAX_VERSION_COUNT = 6;
 static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
 static const unsigned MAX_SUMMARY_DEPTH = 16;
 static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
+static const unsigned CLOCKS_PER_MICROSECOND = CLOCKS_PER_SEC / 1000000;

 typedef struct {
  Subtree token;
@ -63,7 +65,8 @@ struct TSParser {
  void *external_scanner_payload;
  FILE *dot_graph_file;
  unsigned accept_count;
-  size_t operation_limit;
+  clock_t clock_limit;
+  clock_t start_clock;
  volatile bool enabled;
  bool halt_on_error;
  Subtree old_tree;
@ -1242,7 +1245,11 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo
  }
 }

-static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) {
+static bool ts_parser__advance(
+  TSParser *self,
+  StackVersion version,
+  bool allow_node_reuse
+) {
  TSStateId state = ts_stack_state(self->stack, version);
  uint32_t position = ts_stack_position(self->stack, version).bytes;
  Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
@ -1274,6 +1281,11 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
  }

  for (;;) {
+    if ((size_t)(clock() - self->start_clock) > self->clock_limit || !self->enabled) {
+      ts_subtree_release(&self->tree_pool, lookahead);
+      return false;
+    }
+
    StackVersion last_reduction_version = STACK_VERSION_NONE;

    for (uint32_t i = 0; i < table_entry.action_count; i++) {
@ -1302,7 +1314,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_

          ts_parser__shift(self, version, next_state, lookahead, action.params.extra);
          if (did_reuse) reusable_node_advance(&self->reusable_node);
-          return;
+          return true;
        }

        case TSParseActionTypeReduce: {
@ -1322,7 +1334,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
        case TSParseActionTypeAccept: {
          LOG("accept");
          ts_parser__accept(self, version, lookahead);
-          return;
+          return true;
        }

        case TSParseActionTypeRecover: {
@ -1332,7 +1344,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_

          ts_parser__recover(self, version, lookahead);
          if (did_reuse) reusable_node_advance(&self->reusable_node);
-          return;
+          return true;
        }
      }
    }
@ -1371,7 +1383,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_

    if (state == ERROR_STATE) {
      ts_parser__recover(self, version, lookahead);
-      return;
+      return true;
    }

    if (ts_parser__breakdown_top_of_stack(self, version)) {
@ -1381,7 +1393,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
    LOG("detect_error");
    ts_stack_pause(self->stack, version, ts_subtree_leaf_symbol(lookahead));
    ts_subtree_release(&self->tree_pool, lookahead);
-    return;
+    return true;
  }
 }

@ -1492,7 +1504,8 @@ TSParser *ts_parser_new() {
  self->dot_graph_file = NULL;
  self->halt_on_error = false;
  self->enabled = true;
-  self->operation_limit = SIZE_MAX;
+  self->clock_limit = SIZE_MAX;
+  self->start_clock = 0;
  self->old_tree = NULL_SUBTREE;
  self->scratch_tree.ptr = &self->scratch_tree_data;
  self->included_range_differences = (TSRangeArray) array_new();
@ -1574,12 +1587,13 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) {
  self->enabled = enabled;
 }

-size_t ts_parser_operation_limit(const TSParser *self) {
-  return self->operation_limit;
+size_t ts_parser_timeout_micros(const TSParser *self) {
+  return self->clock_limit / CLOCKS_PER_MICROSECOND;
 }

-void ts_parser_set_operation_limit(TSParser *self, size_t limit) {
-  self->operation_limit = limit;
+void ts_parser_set_timeout_micros(TSParser *self, size_t timeout_micros) {
+  self->clock_limit = timeout_micros * CLOCKS_PER_MICROSECOND;
+  if (self->clock_limit == 0) self->clock_limit = SIZE_MAX;
 }

 void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) {
@ -1642,15 +1656,12 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
  }

  uint32_t position = 0, last_position = 0, version_count = 0;
-  size_t operation_count = 0;
+  self->start_clock = clock();

  do {
    for (StackVersion version = 0;
         version_count = ts_stack_version_count(self->stack), version < version_count;
         version++) {
-      if (operation_count > self->operation_limit || !self->enabled) return NULL;
-      operation_count++;
-
      bool allow_node_reuse = version_count == 1;
      while (ts_stack_is_active(self->stack, version)) {
        LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
@ -1659,7 +1670,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) {
            ts_stack_position(self->stack, version).extent.row,
            ts_stack_position(self->stack, version).extent.column);

-        ts_parser__advance(self, version, allow_node_reuse);
+        if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL;
        LOG_STACK();

        position = ts_stack_position(self->stack, version).bytes;