diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 35f14098..1fda8fd8 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -1029,6 +1029,35 @@ fn test_parsing_with_timeout_during_balancing() { }); } +#[test] +fn test_parsing_with_timeout_when_error_detected() { + let mut parser = Parser::new(); + parser.set_language(&get_language("json")).unwrap(); + + // Parse an infinitely-long array, but insert an error after 1000 characters. + let mut offset = 0; + let erroneous_code = "!,"; + let tree = parser.parse_with_options( + &mut |i, _| match i { + 0 => "[", + 1..=1000 => "0,", + _ => erroneous_code, + }, + None, + Some(ParseOptions::new().progress_callback(&mut |state| { + offset = state.current_byte_offset(); + state.has_error() + })), + ); + + // The callback is called at the end of parsing, however, what we're asserting here is that + // parsing ends immediately as the error is detected. This is verified by checking the offset + // of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or + // 1000 + the length of the erroneous code. + assert_eq!(offset, 1000 + erroneous_code.len()); + assert!(tree.is_none()); +} + // Included Ranges #[test] diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 73cd457e..8323bd06 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -82,6 +82,7 @@ pub struct TSInput { pub struct TSParseState { pub payload: *mut ::core::ffi::c_void, pub current_byte_offset: u32, + pub has_error: bool, } #[repr(C)] #[derive(Debug, Copy, Clone)] diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index f45a40cc..8c905a6d 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -147,6 +147,11 @@ impl ParseState { pub const fn current_byte_offset(&self) -> usize { unsafe { self.0.as_ref() }.current_byte_offset as usize } + + #[must_use] + pub const fn has_error(&self) -> bool { + unsafe { self.0.as_ref() }.has_error + } } /// A stateful object that is passed into a [`QueryProgressCallback`] diff --git a/lib/binding_web/lib/imports.js b/lib/binding_web/lib/imports.js index 6e06eece..01e789ae 100644 --- a/lib/binding_web/lib/imports.js +++ b/lib/binding_web/lib/imports.js @@ -23,9 +23,9 @@ mergeInto(LibraryManager.library, { } }, - tree_sitter_progress_callback(currentOffset) { + tree_sitter_progress_callback(currentOffset, hasError) { if (Module.currentProgressCallback) { - return Module.currentProgressCallback({ currentOffset }); + return Module.currentProgressCallback({ currentOffset, hasError }); } return false; }, diff --git a/lib/binding_web/lib/tree-sitter.c b/lib/binding_web/lib/tree-sitter.c index 70f546aa..dcf00e79 100644 --- a/lib/binding_web/lib/tree-sitter.c +++ b/lib/binding_web/lib/tree-sitter.c @@ -139,7 +139,8 @@ extern void tree_sitter_log_callback( ); extern bool tree_sitter_progress_callback( - uint32_t current_offset + uint32_t current_offset, + bool has_error ); extern bool tree_sitter_query_progress_callback( @@ -178,7 +179,7 @@ static void call_log_callback( static bool progress_callback( TSParseState *state ) { - return tree_sitter_progress_callback(state->current_byte_offset); + return tree_sitter_progress_callback(state->current_byte_offset, state->has_error); } static bool query_progress_callback( diff --git a/lib/binding_web/src/parser.ts b/lib/binding_web/src/parser.ts index 855bc720..11aca7ea 100644 --- a/lib/binding_web/src/parser.ts +++ b/lib/binding_web/src/parser.ts @@ -53,6 +53,9 @@ export interface ParseOptions { export interface ParseState { /** The byte offset in the document that the parser is at. */ currentOffset: number; + + /** Indicates whether the parser has encountered an error during parsing. */ + hasError: boolean; } /** diff --git a/lib/binding_web/test/parser.test.ts b/lib/binding_web/test/parser.test.ts index 1186e02c..88240d8c 100644 --- a/lib/binding_web/test/parser.test.ts +++ b/lib/binding_web/test/parser.test.ts @@ -413,5 +413,34 @@ describe('Parser', () => { { progressCallback }, )).toBeNull(); }); + + it('times out when an error is detected', { timeout: 5000 }, () => { + parser.setLanguage(JSON); + + let offset = 0; + const erroneousCode = '!,'; + const progressCallback = (state: ParseState) => { + offset = state.currentOffset; + return state.hasError; + }; + + const tree = parser.parse( + (offset) => { + if (offset === 0) return '['; + if (offset >= 1 && offset < 1000) return '0,'; + return erroneousCode; + }, + null, + { progressCallback }, + ); + + // The callback is called at the end of parsing, however, what we're asserting here is that + // parsing ends immediately as the error is detected. This is verified by checking the offset + // of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or + // 1000 + the length of the erroneous code. Note that in this WASM test, we multiply the offset + // by 2 because JavaScript strings are UTF-16 encoded. + expect(offset).toBe((1000 + erroneousCode.length) * 2); + expect(tree).toBeNull(); + }); }); }); diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 493a37f9..9b0dfac0 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -94,6 +94,7 @@ typedef struct TSInput { typedef struct TSParseState { void *payload; uint32_t current_byte_offset; + bool has_error; } TSParseState; typedef struct TSParseOptions { diff --git a/lib/src/parser.c b/lib/src/parser.c index fb7d60d8..41a349d4 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -116,6 +116,7 @@ struct TSParser { unsigned included_range_difference_index; bool has_scanner_error; bool canceled_balancing; + bool has_error; }; typedef struct { @@ -1419,6 +1420,16 @@ static void ts_parser__recover( self->stack, version, ts_subtree_last_external_token(lookahead) ); } + + bool has_error = true; + for (unsigned i = 0; i < ts_stack_version_count(self->stack); i++) { + ErrorStatus status = ts_parser__version_status(self, i); + if (!status.is_in_error) { + has_error = false; + break; + } + } + self->has_error = has_error; } static void ts_parser__handle_error( @@ -1525,6 +1536,7 @@ static bool ts_parser__check_progress(TSParser *self, Subtree *lookahead, const } if (self->parse_options.progress_callback && position != NULL) { self->parse_state.current_byte_offset = *position; + self->parse_state.has_error = self->has_error; } if ( self->operation_count == 0 && @@ -1929,6 +1941,7 @@ TSParser *ts_parser_new(void) { self->timeout_duration = 0; self->language = NULL; self->has_scanner_error = false; + self->has_error = false; self->canceled_balancing = false; self->external_scanner_payload = NULL; self->end_clock = clock_null(); @@ -2066,6 +2079,7 @@ void ts_parser_reset(TSParser *self) { } self->accept_count = 0; self->has_scanner_error = false; + self->has_error = false; self->parse_options = (TSParseOptions) {0}; self->parse_state = (TSParseState) {0}; }