From cddb3e416d4014cfad833335879ccdfffdb56d5e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 11:13:38 -0700 Subject: [PATCH] Replace operation limit API with a clock-based timeout API --- cli/src/tests/parser_test.rs | 102 +++++++++++++++++++++------------- lib/binding/bindings.rs | 4 +- lib/binding/lib.rs | 18 ++++-- lib/include/tree_sitter/api.h | 4 +- lib/src/parser.c | 45 +++++++++------ 5 files changed, 109 insertions(+), 64 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 7947463a..6b7228dc 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1,7 +1,7 @@ use super::helpers::edits::{perform_edit, Edit, ReadRecorder}; use super::helpers::fixtures::{get_language, get_test_language}; use crate::generate::generate_parser_for_grammar; -use std::{thread, usize}; +use std::{thread, time}; use tree_sitter::{InputEdit, LogType, Parser, Point, Range}; #[test] @@ -269,84 +269,108 @@ fn test_parsing_on_multiple_threads() { assert_eq!(child_count_differences, &[1, 2, 3, 4]); } -// Operation limits +// Timeouts #[test] -fn test_parsing_with_an_operation_limit() { +fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); - // Start parsing from an infinite input. Parsing should abort after 5 "operations". - parser.set_operation_limit(5); - let mut call_count = 0; + // Parse an infinitely-long string, but pause after 100 microseconds of processing. + parser.set_timeout_micros(200); + let start_time = time::Instant::now(); let tree = parser.parse_with( - &mut |_, _| { - if call_count == 0 { - call_count += 1; - b"[0" + &mut |offset, _| { + if offset == 0 { + b"\"" } else { - call_count += 1; - b", 0" + b"x" } }, None, ); assert!(tree.is_none()); - assert!(call_count >= 3); - assert!(call_count <= 8); + assert!(start_time.elapsed().as_micros() > 100); + assert!(start_time.elapsed().as_micros() < 300); - // Resume parsing from the previous state. - call_count = 0; - parser.set_operation_limit(20); + // Continue parsing, but pause after 300 microseconds of processing. + parser.set_timeout_micros(400); + let start_time = time::Instant::now(); + let tree = parser.parse_with( + &mut |offset, _| { + if offset == 0 { + b"\"" + } else { + b"x" + } + }, + None, + ); + assert!(tree.is_none()); + assert!(start_time.elapsed().as_micros() > 300); + assert!(start_time.elapsed().as_micros() < 500); + + // Finish parsing + parser.set_timeout_micros(1_000_000); let tree = parser .parse_with( - &mut |_, _| { - if call_count == 0 { - call_count += 1; - b"]" - } else { + &mut |offset, _| { + if offset > 1000 { b"" + } else if offset == 1000 { + b"\"" + } else { + b"y" } }, None, ) .unwrap(); - assert_eq!( - tree.root_node().to_sexp(), - "(value (array (number) (number) (number)))" - ); + assert_eq!(tree.root_node().to_sexp(), "(value (string))"); } #[test] -fn test_parsing_with_a_reset_after_reaching_an_operation_limit() { +fn test_parsing_with_a_timeout_and_a_reset() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); - parser.set_operation_limit(3); - let tree = parser.parse("[1234, 5, 6, 7, 8]", None); + parser.set_timeout_micros(30); + let tree = parser.parse( + "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ); assert!(tree.is_none()); // Without calling reset, the parser continues from where it left off, so // it does not see the changes to the beginning of the source code. - parser.set_operation_limit(usize::MAX); - let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap(); + parser.set_timeout_micros(0); + let tree = parser.parse( + "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ).unwrap(); assert_eq!( - tree.root_node().to_sexp(), - "(value (array (number) (number) (number) (number) (number)))" + tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + "string" ); - parser.set_operation_limit(3); - let tree = parser.parse("[1234, 5, 6, 7, 8]", None); + parser.set_timeout_micros(30); + let tree = parser.parse( + "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ); assert!(tree.is_none()); // By calling reset, we force the parser to start over from scratch so // that it sees the changes to the beginning of the source code. - parser.set_operation_limit(usize::MAX); + parser.set_timeout_micros(0); parser.reset(); - let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap(); + let tree = parser.parse( + "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ).unwrap(); assert_eq!( - tree.root_node().to_sexp(), - "(value (array (null) (number) (number) (number) (number)))" + tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + "null" ); } diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 9d1f3490..41999088 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -143,10 +143,10 @@ extern "C" { pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool); } extern "C" { - pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize; + pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> usize; } extern "C" { - pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize); + pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: usize); } extern "C" { pub fn ts_parser_reset(arg1: *mut TSParser); diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 3703e299..9f8f1dec 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -230,7 +230,10 @@ impl Parser { pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = input.as_ref(); let len = bytes.len(); - self.parse_with(&mut |i, _| if i < len { &bytes[i..] } else { &[] }, old_tree) + self.parse_with( + &mut |i, _| if i < len { &bytes[i..] } else { &[] }, + old_tree, + ) } pub fn parse_utf16( @@ -240,7 +243,10 @@ impl Parser { ) -> Option { let code_points = input.as_ref(); let len = code_points.len(); - self.parse_utf16_with(&mut |i, _| if i < len { &code_points[i..] } else { &[] }, old_tree) + self.parse_utf16_with( + &mut |i, _| if i < len { &code_points[i..] } else { &[] }, + old_tree, + ) } pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>( @@ -317,8 +323,12 @@ impl Parser { unsafe { ffi::ts_parser_reset(self.0) } } - pub fn set_operation_limit(&mut self, limit: usize) { - unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) } + pub fn timeout_micros(&self) -> usize { + unsafe { ffi::ts_parser_timeout_micros(self.0) } + } + + pub fn set_timeout_micros(&mut self, timeout_micros: usize) { + unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) } } pub fn set_included_ranges(&mut self, ranges: &[Range]) { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 16841c8e..5c72e7b1 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -90,8 +90,8 @@ TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_ TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding); bool ts_parser_enabled(const TSParser *); void ts_parser_set_enabled(TSParser *, bool); -size_t ts_parser_operation_limit(const TSParser *); -void ts_parser_set_operation_limit(TSParser *, size_t); +size_t ts_parser_timeout_micros(const TSParser *); +void ts_parser_set_timeout_micros(TSParser *, size_t); void ts_parser_reset(TSParser *); void ts_parser_set_included_ranges(TSParser *, const TSRange *, uint32_t); const TSRange *ts_parser_included_ranges(const TSParser *, uint32_t *); diff --git a/lib/src/parser.c b/lib/src/parser.c index 85452f8d..0808b786 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -42,6 +43,7 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; +static const unsigned CLOCKS_PER_MICROSECOND = CLOCKS_PER_SEC / 1000000; typedef struct { Subtree token; @@ -63,7 +65,8 @@ struct TSParser { void *external_scanner_payload; FILE *dot_graph_file; unsigned accept_count; - size_t operation_limit; + clock_t clock_limit; + clock_t start_clock; volatile bool enabled; bool halt_on_error; Subtree old_tree; @@ -1242,7 +1245,11 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo } } -static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) { +static bool ts_parser__advance( + TSParser *self, + StackVersion version, + bool allow_node_reuse +) { TSStateId state = ts_stack_state(self->stack, version); uint32_t position = ts_stack_position(self->stack, version).bytes; Subtree last_external_token = ts_stack_last_external_token(self->stack, version); @@ -1274,6 +1281,11 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ } for (;;) { + if ((size_t)(clock() - self->start_clock) > self->clock_limit || !self->enabled) { + ts_subtree_release(&self->tree_pool, lookahead); + return false; + } + StackVersion last_reduction_version = STACK_VERSION_NONE; for (uint32_t i = 0; i < table_entry.action_count; i++) { @@ -1302,7 +1314,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ ts_parser__shift(self, version, next_state, lookahead, action.params.extra); if (did_reuse) reusable_node_advance(&self->reusable_node); - return; + return true; } case TSParseActionTypeReduce: { @@ -1322,7 +1334,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ case TSParseActionTypeAccept: { LOG("accept"); ts_parser__accept(self, version, lookahead); - return; + return true; } case TSParseActionTypeRecover: { @@ -1332,7 +1344,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ ts_parser__recover(self, version, lookahead); if (did_reuse) reusable_node_advance(&self->reusable_node); - return; + return true; } } } @@ -1371,7 +1383,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ if (state == ERROR_STATE) { ts_parser__recover(self, version, lookahead); - return; + return true; } if (ts_parser__breakdown_top_of_stack(self, version)) { @@ -1381,7 +1393,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("detect_error"); ts_stack_pause(self->stack, version, ts_subtree_leaf_symbol(lookahead)); ts_subtree_release(&self->tree_pool, lookahead); - return; + return true; } } @@ -1492,7 +1504,8 @@ TSParser *ts_parser_new() { self->dot_graph_file = NULL; self->halt_on_error = false; self->enabled = true; - self->operation_limit = SIZE_MAX; + self->clock_limit = SIZE_MAX; + self->start_clock = 0; self->old_tree = NULL_SUBTREE; self->scratch_tree.ptr = &self->scratch_tree_data; self->included_range_differences = (TSRangeArray) array_new(); @@ -1574,12 +1587,13 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) { self->enabled = enabled; } -size_t ts_parser_operation_limit(const TSParser *self) { - return self->operation_limit; +size_t ts_parser_timeout_micros(const TSParser *self) { + return self->clock_limit / CLOCKS_PER_MICROSECOND; } -void ts_parser_set_operation_limit(TSParser *self, size_t limit) { - self->operation_limit = limit; +void ts_parser_set_timeout_micros(TSParser *self, size_t timeout_micros) { + self->clock_limit = timeout_micros * CLOCKS_PER_MICROSECOND; + if (self->clock_limit == 0) self->clock_limit = SIZE_MAX; } void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) { @@ -1642,15 +1656,12 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { } uint32_t position = 0, last_position = 0, version_count = 0; - size_t operation_count = 0; + self->start_clock = clock(); do { for (StackVersion version = 0; version_count = ts_stack_version_count(self->stack), version < version_count; version++) { - if (operation_count > self->operation_limit || !self->enabled) return NULL; - operation_count++; - bool allow_node_reuse = version_count == 1; while (ts_stack_is_active(self->stack, version)) { LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", @@ -1659,7 +1670,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { ts_stack_position(self->stack, version).extent.row, ts_stack_position(self->stack, version).extent.column); - ts_parser__advance(self, version, allow_node_reuse); + if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL; LOG_STACK(); position = ts_stack_position(self->stack, version).bytes;