From cddb3e416d4014cfad833335879ccdfffdb56d5e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 11:13:38 -0700 Subject: [PATCH 1/6] Replace operation limit API with a clock-based timeout API --- cli/src/tests/parser_test.rs | 102 +++++++++++++++++++++------------- lib/binding/bindings.rs | 4 +- lib/binding/lib.rs | 18 ++++-- lib/include/tree_sitter/api.h | 4 +- lib/src/parser.c | 45 +++++++++------ 5 files changed, 109 insertions(+), 64 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 7947463a..6b7228dc 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1,7 +1,7 @@ use super::helpers::edits::{perform_edit, Edit, ReadRecorder}; use super::helpers::fixtures::{get_language, get_test_language}; use crate::generate::generate_parser_for_grammar; -use std::{thread, usize}; +use std::{thread, time}; use tree_sitter::{InputEdit, LogType, Parser, Point, Range}; #[test] @@ -269,84 +269,108 @@ fn test_parsing_on_multiple_threads() { assert_eq!(child_count_differences, &[1, 2, 3, 4]); } -// Operation limits +// Timeouts #[test] -fn test_parsing_with_an_operation_limit() { +fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); - // Start parsing from an infinite input. Parsing should abort after 5 "operations". - parser.set_operation_limit(5); - let mut call_count = 0; + // Parse an infinitely-long string, but pause after 100 microseconds of processing. + parser.set_timeout_micros(200); + let start_time = time::Instant::now(); let tree = parser.parse_with( - &mut |_, _| { - if call_count == 0 { - call_count += 1; - b"[0" + &mut |offset, _| { + if offset == 0 { + b"\"" } else { - call_count += 1; - b", 0" + b"x" } }, None, ); assert!(tree.is_none()); - assert!(call_count >= 3); - assert!(call_count <= 8); + assert!(start_time.elapsed().as_micros() > 100); + assert!(start_time.elapsed().as_micros() < 300); - // Resume parsing from the previous state. - call_count = 0; - parser.set_operation_limit(20); + // Continue parsing, but pause after 300 microseconds of processing. + parser.set_timeout_micros(400); + let start_time = time::Instant::now(); + let tree = parser.parse_with( + &mut |offset, _| { + if offset == 0 { + b"\"" + } else { + b"x" + } + }, + None, + ); + assert!(tree.is_none()); + assert!(start_time.elapsed().as_micros() > 300); + assert!(start_time.elapsed().as_micros() < 500); + + // Finish parsing + parser.set_timeout_micros(1_000_000); let tree = parser .parse_with( - &mut |_, _| { - if call_count == 0 { - call_count += 1; - b"]" - } else { + &mut |offset, _| { + if offset > 1000 { b"" + } else if offset == 1000 { + b"\"" + } else { + b"y" } }, None, ) .unwrap(); - assert_eq!( - tree.root_node().to_sexp(), - "(value (array (number) (number) (number)))" - ); + assert_eq!(tree.root_node().to_sexp(), "(value (string))"); } #[test] -fn test_parsing_with_a_reset_after_reaching_an_operation_limit() { +fn test_parsing_with_a_timeout_and_a_reset() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); - parser.set_operation_limit(3); - let tree = parser.parse("[1234, 5, 6, 7, 8]", None); + parser.set_timeout_micros(30); + let tree = parser.parse( + "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ); assert!(tree.is_none()); // Without calling reset, the parser continues from where it left off, so // it does not see the changes to the beginning of the source code. - parser.set_operation_limit(usize::MAX); - let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap(); + parser.set_timeout_micros(0); + let tree = parser.parse( + "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ).unwrap(); assert_eq!( - tree.root_node().to_sexp(), - "(value (array (number) (number) (number) (number) (number)))" + tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + "string" ); - parser.set_operation_limit(3); - let tree = parser.parse("[1234, 5, 6, 7, 8]", None); + parser.set_timeout_micros(30); + let tree = parser.parse( + "[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ); assert!(tree.is_none()); // By calling reset, we force the parser to start over from scratch so // that it sees the changes to the beginning of the source code. - parser.set_operation_limit(usize::MAX); + parser.set_timeout_micros(0); parser.reset(); - let tree = parser.parse("[null, 5, 6, 4, 5]", None).unwrap(); + let tree = parser.parse( + "[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]", + None, + ).unwrap(); assert_eq!( - tree.root_node().to_sexp(), - "(value (array (null) (number) (number) (number) (number)))" + tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + "null" ); } diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 9d1f3490..41999088 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -143,10 +143,10 @@ extern "C" { pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool); } extern "C" { - pub fn ts_parser_operation_limit(arg1: *const TSParser) -> usize; + pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> usize; } extern "C" { - pub fn ts_parser_set_operation_limit(arg1: *mut TSParser, arg2: usize); + pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: usize); } extern "C" { pub fn ts_parser_reset(arg1: *mut TSParser); diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 3703e299..9f8f1dec 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -230,7 +230,10 @@ impl Parser { pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = input.as_ref(); let len = bytes.len(); - self.parse_with(&mut |i, _| if i < len { &bytes[i..] } else { &[] }, old_tree) + self.parse_with( + &mut |i, _| if i < len { &bytes[i..] } else { &[] }, + old_tree, + ) } pub fn parse_utf16( @@ -240,7 +243,10 @@ impl Parser { ) -> Option { let code_points = input.as_ref(); let len = code_points.len(); - self.parse_utf16_with(&mut |i, _| if i < len { &code_points[i..] } else { &[] }, old_tree) + self.parse_utf16_with( + &mut |i, _| if i < len { &code_points[i..] } else { &[] }, + old_tree, + ) } pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>( @@ -317,8 +323,12 @@ impl Parser { unsafe { ffi::ts_parser_reset(self.0) } } - pub fn set_operation_limit(&mut self, limit: usize) { - unsafe { ffi::ts_parser_set_operation_limit(self.0, limit) } + pub fn timeout_micros(&self) -> usize { + unsafe { ffi::ts_parser_timeout_micros(self.0) } + } + + pub fn set_timeout_micros(&mut self, timeout_micros: usize) { + unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) } } pub fn set_included_ranges(&mut self, ranges: &[Range]) { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 16841c8e..5c72e7b1 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -90,8 +90,8 @@ TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_ TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding); bool ts_parser_enabled(const TSParser *); void ts_parser_set_enabled(TSParser *, bool); -size_t ts_parser_operation_limit(const TSParser *); -void ts_parser_set_operation_limit(TSParser *, size_t); +size_t ts_parser_timeout_micros(const TSParser *); +void ts_parser_set_timeout_micros(TSParser *, size_t); void ts_parser_reset(TSParser *); void ts_parser_set_included_ranges(TSParser *, const TSRange *, uint32_t); const TSRange *ts_parser_included_ranges(const TSParser *, uint32_t *); diff --git a/lib/src/parser.c b/lib/src/parser.c index 85452f8d..0808b786 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -42,6 +43,7 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; +static const unsigned CLOCKS_PER_MICROSECOND = CLOCKS_PER_SEC / 1000000; typedef struct { Subtree token; @@ -63,7 +65,8 @@ struct TSParser { void *external_scanner_payload; FILE *dot_graph_file; unsigned accept_count; - size_t operation_limit; + clock_t clock_limit; + clock_t start_clock; volatile bool enabled; bool halt_on_error; Subtree old_tree; @@ -1242,7 +1245,11 @@ static void ts_parser__recover(TSParser *self, StackVersion version, Subtree loo } } -static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_node_reuse) { +static bool ts_parser__advance( + TSParser *self, + StackVersion version, + bool allow_node_reuse +) { TSStateId state = ts_stack_state(self->stack, version); uint32_t position = ts_stack_position(self->stack, version).bytes; Subtree last_external_token = ts_stack_last_external_token(self->stack, version); @@ -1274,6 +1281,11 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ } for (;;) { + if ((size_t)(clock() - self->start_clock) > self->clock_limit || !self->enabled) { + ts_subtree_release(&self->tree_pool, lookahead); + return false; + } + StackVersion last_reduction_version = STACK_VERSION_NONE; for (uint32_t i = 0; i < table_entry.action_count; i++) { @@ -1302,7 +1314,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ ts_parser__shift(self, version, next_state, lookahead, action.params.extra); if (did_reuse) reusable_node_advance(&self->reusable_node); - return; + return true; } case TSParseActionTypeReduce: { @@ -1322,7 +1334,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ case TSParseActionTypeAccept: { LOG("accept"); ts_parser__accept(self, version, lookahead); - return; + return true; } case TSParseActionTypeRecover: { @@ -1332,7 +1344,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ ts_parser__recover(self, version, lookahead); if (did_reuse) reusable_node_advance(&self->reusable_node); - return; + return true; } } } @@ -1371,7 +1383,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ if (state == ERROR_STATE) { ts_parser__recover(self, version, lookahead); - return; + return true; } if (ts_parser__breakdown_top_of_stack(self, version)) { @@ -1381,7 +1393,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("detect_error"); ts_stack_pause(self->stack, version, ts_subtree_leaf_symbol(lookahead)); ts_subtree_release(&self->tree_pool, lookahead); - return; + return true; } } @@ -1492,7 +1504,8 @@ TSParser *ts_parser_new() { self->dot_graph_file = NULL; self->halt_on_error = false; self->enabled = true; - self->operation_limit = SIZE_MAX; + self->clock_limit = SIZE_MAX; + self->start_clock = 0; self->old_tree = NULL_SUBTREE; self->scratch_tree.ptr = &self->scratch_tree_data; self->included_range_differences = (TSRangeArray) array_new(); @@ -1574,12 +1587,13 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) { self->enabled = enabled; } -size_t ts_parser_operation_limit(const TSParser *self) { - return self->operation_limit; +size_t ts_parser_timeout_micros(const TSParser *self) { + return self->clock_limit / CLOCKS_PER_MICROSECOND; } -void ts_parser_set_operation_limit(TSParser *self, size_t limit) { - self->operation_limit = limit; +void ts_parser_set_timeout_micros(TSParser *self, size_t timeout_micros) { + self->clock_limit = timeout_micros * CLOCKS_PER_MICROSECOND; + if (self->clock_limit == 0) self->clock_limit = SIZE_MAX; } void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) { @@ -1642,15 +1656,12 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { } uint32_t position = 0, last_position = 0, version_count = 0; - size_t operation_count = 0; + self->start_clock = clock(); do { for (StackVersion version = 0; version_count = ts_stack_version_count(self->stack), version < version_count; version++) { - if (operation_count > self->operation_limit || !self->enabled) return NULL; - operation_count++; - bool allow_node_reuse = version_count == 1; while (ts_stack_is_active(self->stack, version)) { LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u", @@ -1659,7 +1670,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { ts_stack_position(self->stack, version).extent.row, ts_stack_position(self->stack, version).extent.column); - ts_parser__advance(self, version, allow_node_reuse); + if (!ts_parser__advance(self, version, allow_node_reuse)) return NULL; LOG_STACK(); position = ts_stack_position(self->stack, version).bytes; From 430f8874eab63cd8bf1856468a4114a5ce58386f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 11:52:25 -0700 Subject: [PATCH 2/6] Lib: reduce frequency of clock calls during parsing --- lib/src/parser.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/src/parser.c b/lib/src/parser.c index 0808b786..5f850c72 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -44,6 +44,7 @@ static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; static const unsigned CLOCKS_PER_MICROSECOND = CLOCKS_PER_SEC / 1000000; +static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; typedef struct { Subtree token; @@ -67,6 +68,7 @@ struct TSParser { unsigned accept_count; clock_t clock_limit; clock_t start_clock; + unsigned operation_count; volatile bool enabled; bool halt_on_error; Subtree old_tree; @@ -1281,9 +1283,12 @@ static bool ts_parser__advance( } for (;;) { - if ((size_t)(clock() - self->start_clock) > self->clock_limit || !self->enabled) { - ts_subtree_release(&self->tree_pool, lookahead); - return false; + if (!self->enabled || ++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { + self->operation_count = 0; + if (clock() - self->start_clock > self->clock_limit) { + ts_subtree_release(&self->tree_pool, lookahead); + return false; + } } StackVersion last_reduction_version = STACK_VERSION_NONE; @@ -1506,6 +1511,7 @@ TSParser *ts_parser_new() { self->enabled = true; self->clock_limit = SIZE_MAX; self->start_clock = 0; + self->operation_count = 0; self->old_tree = NULL_SUBTREE; self->scratch_tree.ptr = &self->scratch_tree_data; self->included_range_differences = (TSRangeArray) array_new(); @@ -1656,6 +1662,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { } uint32_t position = 0, last_position = 0, version_count = 0; + self->operation_count = 0; self->start_clock = clock(); do { From e30e827c5ffd9db2ec5e5876535c2492d09a0639 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 11:52:50 -0700 Subject: [PATCH 3/6] CLI: Add timeout flag to parse command --- cli/src/main.rs | 7 +- cli/src/parse.rs | 176 +++++++++++++++++++++++++---------------------- 2 files changed, 100 insertions(+), 83 deletions(-) diff --git a/cli/src/main.rs b/cli/src/main.rs index 15499622..5ad072c6 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -52,7 +52,8 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) .arg(Arg::with_name("quiet").long("quiet").short("q")) - .arg(Arg::with_name("time").long("time").short("t")), + .arg(Arg::with_name("time").long("time").short("t")) + .arg(Arg::with_name("timeout").long("timeout").takes_value(true)), ) .subcommand( SubCommand::with_name("test") @@ -132,6 +133,9 @@ fn run() -> error::Result<()> { let debug_graph = matches.is_present("debug-graph"); let quiet = matches.is_present("quiet"); let time = matches.is_present("time"); + let timeout = matches + .value_of("timeout") + .map_or(0, |t| usize::from_str_radix(t, 10).unwrap()); loader.find_all_languages(&config.parser_directories)?; let paths = matches .values_of("path") @@ -157,6 +161,7 @@ fn run() -> error::Result<()> { max_path_length, quiet, time, + timeout, debug, debug_graph, )?; diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 2e8b3e4c..27b96c38 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -12,6 +12,7 @@ pub fn parse_file_at_path( max_path_length: usize, quiet: bool, print_time: bool, + timeout: usize, debug: bool, debug_graph: bool, ) -> Result { @@ -32,111 +33,122 @@ pub fn parse_file_at_path( }))); } + parser.set_timeout_micros(timeout); let time = Instant::now(); - let tree = parser - .parse(&source_code, None) - .expect("Incompatible language version"); + let tree = parser.parse(&source_code, None); let duration = time.elapsed(); let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000; - let mut cursor = tree.walk(); - let stdout = io::stdout(); let mut stdout = stdout.lock(); - if !quiet { - let mut needs_newline = false; - let mut indent_level = 0; - let mut did_visit_children = false; + if let Some(tree) = tree { + let mut cursor = tree.walk(); + + if !quiet { + let mut needs_newline = false; + let mut indent_level = 0; + let mut did_visit_children = false; + loop { + let node = cursor.node(); + let is_named = node.is_named(); + if did_visit_children { + if is_named { + stdout.write(b")")?; + needs_newline = true; + } + if cursor.goto_next_sibling() { + did_visit_children = false; + } else if cursor.goto_parent() { + did_visit_children = true; + indent_level -= 1; + } else { + break; + } + } else { + if is_named { + if needs_newline { + stdout.write(b"\n")?; + } + for _ in 0..indent_level { + stdout.write(b" ")?; + } + let start = node.start_position(); + let end = node.end_position(); + write!( + &mut stdout, + "({} [{}, {}] - [{}, {}]", + node.kind(), + start.row, + start.column, + end.row, + end.column + )?; + needs_newline = true; + } + if cursor.goto_first_child() { + did_visit_children = false; + indent_level += 1; + } else { + did_visit_children = true; + } + } + } + cursor.reset(tree.root_node()); + println!(""); + } + + let mut first_error = None; loop { let node = cursor.node(); - let is_named = node.is_named(); - if did_visit_children { - if is_named { - stdout.write(b")")?; - needs_newline = true; - } - if cursor.goto_next_sibling() { - did_visit_children = false; - } else if cursor.goto_parent() { - did_visit_children = true; - indent_level -= 1; + if node.has_error() { + if node.is_error() || node.is_missing() { + first_error = Some(node); + break; } else { + cursor.goto_first_child(); + } + } else if !cursor.goto_next_sibling() { + if !cursor.goto_parent() { break; } - } else { - if is_named { - if needs_newline { - stdout.write(b"\n")?; - } - for _ in 0..indent_level { - stdout.write(b" ")?; - } - let start = node.start_position(); - let end = node.end_position(); - write!( - &mut stdout, - "({} [{}, {}] - [{}, {}]", - node.kind(), - start.row, - start.column, - end.row, - end.column - )?; - needs_newline = true; - } - if cursor.goto_first_child() { - did_visit_children = false; - indent_level += 1; - } else { - did_visit_children = true; - } } } - cursor.reset(tree.root_node()); - println!(""); - } - let mut first_error = None; - loop { - let node = cursor.node(); - if node.has_error() { - if node.is_error() || node.is_missing() { - first_error = Some(node); - break; - } else { - cursor.goto_first_child(); - } - } else if !cursor.goto_next_sibling() { - if !cursor.goto_parent() { - break; + if first_error.is_some() || print_time { + write!( + &mut stdout, + "{:width$}\t{} ms", + path.to_str().unwrap(), + duration_ms, + width = max_path_length + )?; + if let Some(node) = first_error { + let start = node.start_position(); + let end = node.end_position(); + write!( + &mut stdout, + "\t({} [{}, {}] - [{}, {}])", + node.kind(), + start.row, + start.column, + end.row, + end.column + )?; } + write!(&mut stdout, "\n")?; } - } - if first_error.is_some() || print_time { - write!( + return Ok(first_error.is_some()) + } else if print_time { + writeln!( &mut stdout, - "{:width$}\t{} ms", + "{:width$}\t{} ms (timed out)", path.to_str().unwrap(), duration_ms, width = max_path_length )?; - if let Some(node) = first_error { - let start = node.start_position(); - let end = node.end_position(); - write!( - &mut stdout, - "\t({} [{}, {}] - [{}, {}])", - node.kind(), - start.row, - start.column, - end.row, - end.column - )?; - } - write!(&mut stdout, "\n")?; } - Ok(first_error.is_some()) + Ok(false) } From 88e3907cc08c999f55cf8d01dd4d83953c75ace0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 12:39:04 -0700 Subject: [PATCH 4/6] Use QueryPerformanceFrequency as clock on windows --- cli/src/main.rs | 26 +++++++++++++++--------- cli/src/parse.rs | 2 +- lib/binding/bindings.rs | 5 ++--- lib/binding/lib.rs | 4 ++-- lib/include/tree_sitter/api.h | 4 ++-- lib/src/clock.h | 34 +++++++++++++++++++++++++++++++ lib/src/parser.c | 38 +++++++++++++++++------------------ 7 files changed, 76 insertions(+), 37 deletions(-) create mode 100644 lib/src/clock.h diff --git a/cli/src/main.rs b/cli/src/main.rs index 5ad072c6..dc4b5ae6 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -3,7 +3,7 @@ use std::env; use std::fs; use std::path::Path; use std::process::exit; -use std::usize; +use std::{u64, usize}; use tree_sitter_cli::{ config, error, generate, highlight, loader, logger, parse, properties, test, }; @@ -49,6 +49,7 @@ fn run() -> error::Result<()> { .multiple(true) .required(true), ) + .arg(Arg::with_name("scope").long("scope").takes_value(true)) .arg(Arg::with_name("debug").long("debug").short("d")) .arg(Arg::with_name("debug-graph").long("debug-graph").short("D")) .arg(Arg::with_name("quiet").long("quiet").short("q")) @@ -135,7 +136,7 @@ fn run() -> error::Result<()> { let time = matches.is_present("time"); let timeout = matches .value_of("timeout") - .map_or(0, |t| usize::from_str_radix(t, 10).unwrap()); + .map_or(0, |t| u64::from_str_radix(t, 10).unwrap()); loader.find_all_languages(&config.parser_directories)?; let paths = matches .values_of("path") @@ -146,15 +147,20 @@ fn run() -> error::Result<()> { let mut has_error = false; for path in paths { let path = Path::new(path); - let language = - if let Some((l, _)) = loader.language_configuration_for_file_name(path)? { - l - } else if let Some(l) = loader.language_at_path(¤t_dir)? { - l + let language = if let Some(scope) = matches.value_of("scope") { + if let Some(config) = loader.language_configuration_for_scope(scope)? { + config.0 } else { - eprintln!("No language found"); - return Ok(()); - }; + return Err(error::Error(format!("Unknown scope '{}'", scope))); + } + } else if let Some((l, _)) = loader.language_configuration_for_file_name(path)? { + l + } else if let Some(l) = loader.language_at_path(¤t_dir)? { + l + } else { + eprintln!("No language found"); + return Ok(()); + }; has_error |= parse::parse_file_at_path( language, path, diff --git a/cli/src/parse.rs b/cli/src/parse.rs index 27b96c38..f4002233 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -12,7 +12,7 @@ pub fn parse_file_at_path( max_path_length: usize, quiet: bool, print_time: bool, - timeout: usize, + timeout: u64, debug: bool, debug_graph: bool, ) -> Result { diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 41999088..7c8c704a 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -1,6 +1,5 @@ /* automatically generated by rust-bindgen */ -pub type __darwin_size_t = ::std::os::raw::c_ulong; pub type FILE = [u64; 19usize]; pub type TSSymbol = u16; #[repr(C)] @@ -143,10 +142,10 @@ extern "C" { pub fn ts_parser_set_enabled(arg1: *mut TSParser, arg2: bool); } extern "C" { - pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> usize; + pub fn ts_parser_timeout_micros(arg1: *const TSParser) -> u64; } extern "C" { - pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: usize); + pub fn ts_parser_set_timeout_micros(arg1: *mut TSParser, arg2: u64); } extern "C" { pub fn ts_parser_reset(arg1: *mut TSParser); diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 9f8f1dec..f4f161a6 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -323,11 +323,11 @@ impl Parser { unsafe { ffi::ts_parser_reset(self.0) } } - pub fn timeout_micros(&self) -> usize { + pub fn timeout_micros(&self) -> u64 { unsafe { ffi::ts_parser_timeout_micros(self.0) } } - pub fn set_timeout_micros(&mut self, timeout_micros: usize) { + pub fn set_timeout_micros(&mut self, timeout_micros: u64) { unsafe { ffi::ts_parser_set_timeout_micros(self.0, timeout_micros) } } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 5c72e7b1..e16ca576 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -90,8 +90,8 @@ TSTree *ts_parser_parse_string(TSParser *, const TSTree *, const char *, uint32_ TSTree *ts_parser_parse_string_encoding(TSParser *, const TSTree *, const char *, uint32_t, TSInputEncoding); bool ts_parser_enabled(const TSParser *); void ts_parser_set_enabled(TSParser *, bool); -size_t ts_parser_timeout_micros(const TSParser *); -void ts_parser_set_timeout_micros(TSParser *, size_t); +uint64_t ts_parser_timeout_micros(const TSParser *); +void ts_parser_set_timeout_micros(TSParser *, uint64_t); void ts_parser_reset(TSParser *); void ts_parser_set_included_ranges(TSParser *, const TSRange *, uint32_t); const TSRange *ts_parser_included_ranges(const TSParser *, uint32_t *); diff --git a/lib/src/clock.h b/lib/src/clock.h new file mode 100644 index 00000000..3193a6b3 --- /dev/null +++ b/lib/src/clock.h @@ -0,0 +1,34 @@ +#ifndef TREE_SITTER_CLOCK_H_ +#define TREE_SITTER_CLOCK_H_ + +#include + +#ifdef _WIN32 + +#include + +static inline uint64_t get_clock() { + LARGE_INTEGER result; + QueryPerformanceCounter(&result); + return (uint64_t)result.QuadPart; +} + +static inline uint64_t get_clocks_per_second() { + LARGE_INTEGER result; + QueryPerformanceFrequency(&result); + return (uint64_t)result.QuadPart; +} + +#else + +static inline uint64_t get_clock() { + return (uint64_t)clock(); +} + +static inline uint64_t get_clocks_per_second() { + return (uint64_t)CLOCKS_PER_SEC; +} + +#endif + +#endif // TREE_SITTER_CLOCK_H_ diff --git a/lib/src/parser.c b/lib/src/parser.c index 5f850c72..15b33d54 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -4,17 +4,18 @@ #include #include #include "tree_sitter/api.h" -#include "./subtree.h" -#include "./lexer.h" -#include "./length.h" -#include "./array.h" -#include "./language.h" #include "./alloc.h" -#include "./stack.h" -#include "./reusable_node.h" -#include "./reduce_action.h" +#include "./array.h" +#include "./clock.h" #include "./error_costs.h" #include "./get_changed_ranges.h" +#include "./language.h" +#include "./length.h" +#include "./lexer.h" +#include "./reduce_action.h" +#include "./reusable_node.h" +#include "./stack.h" +#include "./subtree.h" #include "./tree.h" #define LOG(...) \ @@ -43,7 +44,6 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; -static const unsigned CLOCKS_PER_MICROSECOND = CLOCKS_PER_SEC / 1000000; static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; typedef struct { @@ -66,8 +66,8 @@ struct TSParser { void *external_scanner_payload; FILE *dot_graph_file; unsigned accept_count; - clock_t clock_limit; - clock_t start_clock; + uint64_t clock_limit; + uint64_t start_clock; unsigned operation_count; volatile bool enabled; bool halt_on_error; @@ -1285,7 +1285,7 @@ static bool ts_parser__advance( for (;;) { if (!self->enabled || ++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { self->operation_count = 0; - if (clock() - self->start_clock > self->clock_limit) { + if ((uint64_t)(get_clock() - self->start_clock) > self->clock_limit) { ts_subtree_release(&self->tree_pool, lookahead); return false; } @@ -1509,7 +1509,7 @@ TSParser *ts_parser_new() { self->dot_graph_file = NULL; self->halt_on_error = false; self->enabled = true; - self->clock_limit = SIZE_MAX; + self->clock_limit = UINT64_MAX; self->start_clock = 0; self->operation_count = 0; self->old_tree = NULL_SUBTREE; @@ -1593,13 +1593,13 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) { self->enabled = enabled; } -size_t ts_parser_timeout_micros(const TSParser *self) { - return self->clock_limit / CLOCKS_PER_MICROSECOND; +uint64_t ts_parser_timeout_micros(const TSParser *self) { + return self->clock_limit / (get_clocks_per_second() / 1000000); } -void ts_parser_set_timeout_micros(TSParser *self, size_t timeout_micros) { - self->clock_limit = timeout_micros * CLOCKS_PER_MICROSECOND; - if (self->clock_limit == 0) self->clock_limit = SIZE_MAX; +void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { + self->clock_limit = timeout_micros * (get_clocks_per_second() / 1000000); + if (self->clock_limit == 0) self->clock_limit = UINT64_MAX; } void ts_parser_set_included_ranges(TSParser *self, const TSRange *ranges, uint32_t count) { @@ -1663,7 +1663,7 @@ TSTree *ts_parser_parse(TSParser *self, const TSTree *old_tree, TSInput input) { uint32_t position = 0, last_position = 0, version_count = 0; self->operation_count = 0; - self->start_clock = clock(); + self->start_clock = get_clock(); do { for (StackVersion version = 0; From 9ae594a50761fc7d5255d3767acb452e686eb085 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 14:09:14 -0700 Subject: [PATCH 5/6] Be more loose with timeout unit test assertions --- cli/src/tests/parser_test.rs | 69 ++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 6b7228dc..afa86167 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -276,57 +276,56 @@ fn test_parsing_with_a_timeout() { let mut parser = Parser::new(); parser.set_language(get_language("json")).unwrap(); - // Parse an infinitely-long string, but pause after 100 microseconds of processing. - parser.set_timeout_micros(200); + // Parse an infinitely-long array, but pause after 100 microseconds of processing. + parser.set_timeout_micros(100); let start_time = time::Instant::now(); let tree = parser.parse_with( &mut |offset, _| { if offset == 0 { - b"\"" + b" [" } else { - b"x" + b",0" } }, None, ); assert!(tree.is_none()); - assert!(start_time.elapsed().as_micros() > 100); - assert!(start_time.elapsed().as_micros() < 300); - - // Continue parsing, but pause after 300 microseconds of processing. - parser.set_timeout_micros(400); - let start_time = time::Instant::now(); - let tree = parser.parse_with( - &mut |offset, _| { - if offset == 0 { - b"\"" - } else { - b"x" - } - }, - None, - ); - assert!(tree.is_none()); - assert!(start_time.elapsed().as_micros() > 300); assert!(start_time.elapsed().as_micros() < 500); + // Continue parsing, but pause after 300 microseconds of processing. + parser.set_timeout_micros(1000); + let start_time = time::Instant::now(); + let tree = parser.parse_with( + &mut |offset, _| { + if offset == 0 { + b" [" + } else { + b",0" + } + }, + None, + ); + assert!(tree.is_none()); + assert!(start_time.elapsed().as_micros() > 500); + assert!(start_time.elapsed().as_micros() < 1500); + // Finish parsing - parser.set_timeout_micros(1_000_000); + parser.set_timeout_micros(0); let tree = parser .parse_with( &mut |offset, _| { - if offset > 1000 { + if offset > 5000 { b"" - } else if offset == 1000 { - b"\"" + } else if offset == 5000 { + b"]" } else { - b"y" + b",0" } }, None, ) .unwrap(); - assert_eq!(tree.root_node().to_sexp(), "(value (string))"); + assert_eq!(tree.root_node().child(0).unwrap().kind(), "array"); } #[test] @@ -349,7 +348,12 @@ fn test_parsing_with_a_timeout_and_a_reset() { None, ).unwrap(); assert_eq!( - tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + tree.root_node() + .named_child(0) + .unwrap() + .named_child(0) + .unwrap() + .kind(), "string" ); @@ -369,7 +373,12 @@ fn test_parsing_with_a_timeout_and_a_reset() { None, ).unwrap(); assert_eq!( - tree.root_node().named_child(0).unwrap().named_child(0).unwrap().kind(), + tree.root_node() + .named_child(0) + .unwrap() + .named_child(0) + .unwrap() + .kind(), "null" ); } From 59fd8528d408f3abd22e32e3695649317ac5b5d8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Mar 2019 15:21:03 -0700 Subject: [PATCH 6/6] Avoid division rounding errors w/ clock counts --- lib/src/parser.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/src/parser.c b/lib/src/parser.c index 15b33d54..7125faa9 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -1594,11 +1594,11 @@ void ts_parser_set_enabled(TSParser *self, bool enabled) { } uint64_t ts_parser_timeout_micros(const TSParser *self) { - return self->clock_limit / (get_clocks_per_second() / 1000000); + return self->clock_limit * 1000000 / get_clocks_per_second(); } void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) { - self->clock_limit = timeout_micros * (get_clocks_per_second() / 1000000); + self->clock_limit = timeout_micros * get_clocks_per_second() / 1000000; if (self->clock_limit == 0) self->clock_limit = UINT64_MAX; }