From 26b89da9bbdeaf1f1c9c9fe7771d86ee34ff27cb Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sun, 27 Oct 2024 23:55:48 -0400 Subject: [PATCH] feat(lib): add `ts_parser_parse_with_options` Currently, this allows users to pass in a callback that should be invoked to check whether or not to halt parsing --- lib/binding_rust/bindings.rs | 24 +++++++++++++++++++++- lib/include/tree_sitter/api.h | 34 ++++++++++++++++++++++++++++++- lib/src/parser.c | 38 ++++++++++++++++++++++++++++------- 3 files changed, 87 insertions(+), 9 deletions(-) diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 2c0af264..57fbad87 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -72,6 +72,19 @@ pub struct TSInput { >, pub encoding: TSInputEncoding, } +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSParseState { + pub payload: *mut ::core::ffi::c_void, + pub current_byte_offset: u32, +} +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct TSParseOptions { + pub payload: *mut ::core::ffi::c_void, + pub progress_callback: + ::core::option::Option bool>, +} pub const TSLogTypeParse: TSLogType = 0; pub const TSLogTypeLex: TSLogType = 1; pub type TSLogType = ::core::ffi::c_uint; @@ -178,13 +191,22 @@ extern "C" { pub fn ts_parser_included_ranges(self_: *const TSParser, count: *mut u32) -> *const TSRange; } extern "C" { - #[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the [`ts_tree_edit`] function in a\n way that exactly matches the source code changes.\n\n The [`TSInput`] parameter lets you specify how to read the text. It has the\n following three fields:\n 1. [`read`]: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the [`bytes_read`] pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n [`bytes_read`] pointer to indicate the end of the document.\n 2. [`payload`]: An arbitrary pointer that will be passed to each invocation\n of the [`read`] function.\n 3. [`encoding`]: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are three possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n[`ts_parser_language`] function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the [`ts_parser_set_timeout_micros`] function. You can resume parsing from\n where the parser left out by calling [`ts_parser_parse`] again with the\n same arguments. Or you can start parsing from scratch by first calling\n [`ts_parser_reset`].\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing\n from where the parser left out by calling [`ts_parser_parse`] again with\n the same arguments.\n\n [`read`]: TSInput::read\n [`payload`]: TSInput::payload\n [`encoding`]: TSInput::encoding\n [`bytes_read`]: TSInput::read"] + #[doc = " Use the parser to parse some source code and create a syntax tree.\n\n If you are parsing this document for the first time, pass `NULL` for the\n `old_tree` parameter. Otherwise, if you have already parsed an earlier\n version of this document and the document has since been edited, pass the\n previous syntax tree so that the unchanged parts of it can be reused.\n This will save time and memory. For this to work correctly, you must have\n already edited the old syntax tree using the [`ts_tree_edit`] function in a\n way that exactly matches the source code changes.\n\n The [`TSInput`] parameter lets you specify how to read the text. It has the\n following three fields:\n 1. [`read`]: A function to retrieve a chunk of text at a given byte offset\n and (row, column) position. The function should return a pointer to the\n text and write its length to the [`bytes_read`] pointer. The parser does\n not take ownership of this buffer; it just borrows it until it has\n finished reading it. The function should write a zero value to the\n [`bytes_read`] pointer to indicate the end of the document.\n 2. [`payload`]: An arbitrary pointer that will be passed to each invocation\n of the [`read`] function.\n 3. [`encoding`]: An indication of how the text is encoded. Either\n `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.\n\n This function returns a syntax tree on success, and `NULL` on failure. There\n are four possible reasons for failure:\n 1. The parser does not have a language assigned. Check for this using the\n[`ts_parser_language`] function.\n 2. Parsing was cancelled due to a timeout that was set by an earlier call to\n the [`ts_parser_set_timeout_micros`] function. You can resume parsing from\n where the parser left out by calling [`ts_parser_parse`] again with the\n same arguments. Or you can start parsing from scratch by first calling\n [`ts_parser_reset`].\n 3. Parsing was cancelled using a cancellation flag that was set by an\n earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing\n from where the parser left out by calling [`ts_parser_parse`] again with\n the same arguments.\n 4. Parsing was cancelled due to the progress callback returning true. This callback\n is passed in [`ts_parser_parse_with_options`] inside the [`TSParseOptions`] struct.\n\n [`read`]: TSInput::read\n [`payload`]: TSInput::payload\n [`encoding`]: TSInput::encoding\n [`bytes_read`]: TSInput::read"] pub fn ts_parser_parse( self_: *mut TSParser, old_tree: *const TSTree, input: TSInput, ) -> *mut TSTree; } +extern "C" { + #[doc = " Use the parser to parse some source code and create a syntax tree, with some options.\n\n See [`ts_parser_parse`] for more details."] + pub fn ts_parser_parse_with_options( + self_: *mut TSParser, + old_tree: *const TSTree, + input: TSInput, + parse_options: *const TSParseOptions, + ) -> *mut TSTree; +} extern "C" { #[doc = " Use the parser to parse some source code stored in one contiguous buffer.\n The first two parameters are the same as in the [`ts_parser_parse`] function\n above. The second two parameters indicate the location of the buffer and its\n length in bytes."] pub fn ts_parser_parse_string( diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 443a8114..5c994136 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -79,6 +79,16 @@ typedef struct TSInput { TSInputEncoding encoding; } TSInput; +typedef struct TSParseState { + void *payload; + uint32_t current_byte_offset; +} TSParseState; + +typedef struct TSParseOptions { + void *payload; + bool (*progress_callback)(TSParseState *state); +} TSParseOptions; + typedef enum TSLogType { TSLogTypeParse, TSLogTypeLex, @@ -247,7 +257,7 @@ const TSRange *ts_parser_included_ranges( * `TSInputEncodingUTF8` or `TSInputEncodingUTF16`. * * This function returns a syntax tree on success, and `NULL` on failure. There - * are three possible reasons for failure: + * are four possible reasons for failure: * 1. The parser does not have a language assigned. Check for this using the [`ts_parser_language`] function. * 2. Parsing was cancelled due to a timeout that was set by an earlier call to @@ -259,6 +269,8 @@ const TSRange *ts_parser_included_ranges( * earlier call to [`ts_parser_set_cancellation_flag`]. You can resume parsing * from where the parser left out by calling [`ts_parser_parse`] again with * the same arguments. + * 4. Parsing was cancelled due to the progress callback returning true. This callback + * is passed in [`ts_parser_parse_with_options`] inside the [`TSParseOptions`] struct. * * [`read`]: TSInput::read * [`payload`]: TSInput::payload @@ -271,6 +283,18 @@ TSTree *ts_parser_parse( TSInput input ); +/** + * Use the parser to parse some source code and create a syntax tree, with some options. + * + * See [`ts_parser_parse`] for more details. + */ +TSTree* ts_parser_parse_with_options( + TSParser *self, + const TSTree *old_tree, + TSInput input, + TSParseOptions parse_options +); + /** * Use the parser to parse some source code stored in one contiguous buffer. * The first two parameters are the same as in the [`ts_parser_parse`] function @@ -310,6 +334,8 @@ TSTree *ts_parser_parse_string_encoding( void ts_parser_reset(TSParser *self); /** + * @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26. + * * Set the maximum duration in microseconds that parsing should be allowed to * take before halting. * @@ -319,11 +345,15 @@ void ts_parser_reset(TSParser *self); void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros); /** + * @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26. + * * Get the duration in microseconds that parsing is allowed to take. */ uint64_t ts_parser_timeout_micros(const TSParser *self); /** + * @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26. + * * Set the parser's current cancellation flag pointer. * * If a non-null pointer is assigned, then the parser will periodically read @@ -333,6 +363,8 @@ uint64_t ts_parser_timeout_micros(const TSParser *self); void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag); /** + * @deprecated use [`ts_parser_parse_with_options`] and pass in a callback instead, this will be removed in 0.26. + * * Get the parser's current cancellation flag pointer. */ const size_t *ts_parser_cancellation_flag(const TSParser *self); diff --git a/lib/src/parser.c b/lib/src/parser.c index b82be47e..9c5ddeee 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -111,6 +111,8 @@ struct TSParser { const volatile size_t *cancellation_flag; Subtree old_tree; TSRangeArray included_range_differences; + TSParseOptions parse_options; + TSParseState parse_state; unsigned included_range_difference_index; bool has_scanner_error; }; @@ -1562,20 +1564,26 @@ static bool ts_parser__advance( } } - // If a cancellation flag or a timeout was provided, then check every + // If a cancellation flag, timeout, or progress callback was provided, then check every // time a fixed number of parse actions has been processed. if (++self->operation_count == OP_COUNT_PER_PARSER_TIMEOUT_CHECK) { self->operation_count = 0; } + if (self->parse_options.progress_callback) { + self->parse_state.current_byte_offset = position; + } if ( self->operation_count == 0 && - ((self->cancellation_flag && atomic_load(self->cancellation_flag)) || - (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock))) + ( + (self->cancellation_flag && atomic_load(self->cancellation_flag)) || + (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)) || + (self->parse_options.progress_callback && self->parse_options.progress_callback(&self->parse_state)) + ) ) { - if (lookahead.ptr) { - ts_subtree_release(&self->tree_pool, lookahead); - } - return false; + if (lookahead.ptr) { + ts_subtree_release(&self->tree_pool, lookahead); + } + return false; } // Process each parse action for the current lookahead token in @@ -2118,6 +2126,22 @@ exit: return result; } +TSTree *ts_parser_parse_with_options( + TSParser *self, + const TSTree *old_tree, + TSInput input, + TSParseOptions parse_options +) { + self->parse_options = parse_options; + self->parse_state = (TSParseState) { + .payload = parse_options.payload, + }; + TSTree *result = ts_parser_parse(self, old_tree, input); + self->parse_options = (TSParseOptions) {0}; + self->parse_state = (TSParseState) {0}; + return result; +} + TSTree *ts_parser_parse_string( TSParser *self, const TSTree *old_tree,