feat: add error information in the progress callback
This allows users to bail parsing if an error was *definitely* detected using the progress callback, as all possible stack versions have a non-zero error cost. Co-authored-by: Amaan Qureshi <amaanq12@gmail.com>
This commit is contained in:
parent
ca087d2c07
commit
cda634a1c4
9 changed files with 87 additions and 4 deletions
|
|
@ -1029,6 +1029,35 @@ fn test_parsing_with_timeout_during_balancing() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_timeout_when_error_detected() {
|
||||
let mut parser = Parser::new();
|
||||
parser.set_language(&get_language("json")).unwrap();
|
||||
|
||||
// Parse an infinitely-long array, but insert an error after 1000 characters.
|
||||
let mut offset = 0;
|
||||
let erroneous_code = "!,";
|
||||
let tree = parser.parse_with_options(
|
||||
&mut |i, _| match i {
|
||||
0 => "[",
|
||||
1..=1000 => "0,",
|
||||
_ => erroneous_code,
|
||||
},
|
||||
None,
|
||||
Some(ParseOptions::new().progress_callback(&mut |state| {
|
||||
offset = state.current_byte_offset();
|
||||
state.has_error()
|
||||
})),
|
||||
);
|
||||
|
||||
// The callback is called at the end of parsing, however, what we're asserting here is that
|
||||
// parsing ends immediately as the error is detected. This is verified by checking the offset
|
||||
// of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or
|
||||
// 1000 + the length of the erroneous code.
|
||||
assert_eq!(offset, 1000 + erroneous_code.len());
|
||||
assert!(tree.is_none());
|
||||
}
|
||||
|
||||
// Included Ranges
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -82,6 +82,7 @@ pub struct TSInput {
|
|||
pub struct TSParseState {
|
||||
pub payload: *mut ::core::ffi::c_void,
|
||||
pub current_byte_offset: u32,
|
||||
pub has_error: bool,
|
||||
}
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
|
|
|
|||
|
|
@ -147,6 +147,11 @@ impl ParseState {
|
|||
pub const fn current_byte_offset(&self) -> usize {
|
||||
unsafe { self.0.as_ref() }.current_byte_offset as usize
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub const fn has_error(&self) -> bool {
|
||||
unsafe { self.0.as_ref() }.has_error
|
||||
}
|
||||
}
|
||||
|
||||
/// A stateful object that is passed into a [`QueryProgressCallback`]
|
||||
|
|
|
|||
|
|
@ -23,9 +23,9 @@ mergeInto(LibraryManager.library, {
|
|||
}
|
||||
},
|
||||
|
||||
tree_sitter_progress_callback(currentOffset) {
|
||||
tree_sitter_progress_callback(currentOffset, hasError) {
|
||||
if (Module.currentProgressCallback) {
|
||||
return Module.currentProgressCallback({ currentOffset });
|
||||
return Module.currentProgressCallback({ currentOffset, hasError });
|
||||
}
|
||||
return false;
|
||||
},
|
||||
|
|
|
|||
|
|
@ -139,7 +139,8 @@ extern void tree_sitter_log_callback(
|
|||
);
|
||||
|
||||
extern bool tree_sitter_progress_callback(
|
||||
uint32_t current_offset
|
||||
uint32_t current_offset,
|
||||
bool has_error
|
||||
);
|
||||
|
||||
extern bool tree_sitter_query_progress_callback(
|
||||
|
|
@ -178,7 +179,7 @@ static void call_log_callback(
|
|||
static bool progress_callback(
|
||||
TSParseState *state
|
||||
) {
|
||||
return tree_sitter_progress_callback(state->current_byte_offset);
|
||||
return tree_sitter_progress_callback(state->current_byte_offset, state->has_error);
|
||||
}
|
||||
|
||||
static bool query_progress_callback(
|
||||
|
|
|
|||
|
|
@ -53,6 +53,9 @@ export interface ParseOptions {
|
|||
export interface ParseState {
|
||||
/** The byte offset in the document that the parser is at. */
|
||||
currentOffset: number;
|
||||
|
||||
/** Indicates whether the parser has encountered an error during parsing. */
|
||||
hasError: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -413,5 +413,34 @@ describe('Parser', () => {
|
|||
{ progressCallback },
|
||||
)).toBeNull();
|
||||
});
|
||||
|
||||
it('times out when an error is detected', { timeout: 5000 }, () => {
|
||||
parser.setLanguage(JSON);
|
||||
|
||||
let offset = 0;
|
||||
const erroneousCode = '!,';
|
||||
const progressCallback = (state: ParseState) => {
|
||||
offset = state.currentOffset;
|
||||
return state.hasError;
|
||||
};
|
||||
|
||||
const tree = parser.parse(
|
||||
(offset) => {
|
||||
if (offset === 0) return '[';
|
||||
if (offset >= 1 && offset < 1000) return '0,';
|
||||
return erroneousCode;
|
||||
},
|
||||
null,
|
||||
{ progressCallback },
|
||||
);
|
||||
|
||||
// The callback is called at the end of parsing, however, what we're asserting here is that
|
||||
// parsing ends immediately as the error is detected. This is verified by checking the offset
|
||||
// of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or
|
||||
// 1000 + the length of the erroneous code. Note that in this WASM test, we multiply the offset
|
||||
// by 2 because JavaScript strings are UTF-16 encoded.
|
||||
expect(offset).toBe((1000 + erroneousCode.length) * 2);
|
||||
expect(tree).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ typedef struct TSInput {
|
|||
typedef struct TSParseState {
|
||||
void *payload;
|
||||
uint32_t current_byte_offset;
|
||||
bool has_error;
|
||||
} TSParseState;
|
||||
|
||||
typedef struct TSParseOptions {
|
||||
|
|
|
|||
|
|
@ -116,6 +116,7 @@ struct TSParser {
|
|||
unsigned included_range_difference_index;
|
||||
bool has_scanner_error;
|
||||
bool canceled_balancing;
|
||||
bool has_error;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -1419,6 +1420,16 @@ static void ts_parser__recover(
|
|||
self->stack, version, ts_subtree_last_external_token(lookahead)
|
||||
);
|
||||
}
|
||||
|
||||
bool has_error = true;
|
||||
for (unsigned i = 0; i < ts_stack_version_count(self->stack); i++) {
|
||||
ErrorStatus status = ts_parser__version_status(self, i);
|
||||
if (!status.is_in_error) {
|
||||
has_error = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
self->has_error = has_error;
|
||||
}
|
||||
|
||||
static void ts_parser__handle_error(
|
||||
|
|
@ -1525,6 +1536,7 @@ static bool ts_parser__check_progress(TSParser *self, Subtree *lookahead, const
|
|||
}
|
||||
if (self->parse_options.progress_callback && position != NULL) {
|
||||
self->parse_state.current_byte_offset = *position;
|
||||
self->parse_state.has_error = self->has_error;
|
||||
}
|
||||
if (
|
||||
self->operation_count == 0 &&
|
||||
|
|
@ -1929,6 +1941,7 @@ TSParser *ts_parser_new(void) {
|
|||
self->timeout_duration = 0;
|
||||
self->language = NULL;
|
||||
self->has_scanner_error = false;
|
||||
self->has_error = false;
|
||||
self->canceled_balancing = false;
|
||||
self->external_scanner_payload = NULL;
|
||||
self->end_clock = clock_null();
|
||||
|
|
@ -2066,6 +2079,7 @@ void ts_parser_reset(TSParser *self) {
|
|||
}
|
||||
self->accept_count = 0;
|
||||
self->has_scanner_error = false;
|
||||
self->has_error = false;
|
||||
self->parse_options = (TSParseOptions) {0};
|
||||
self->parse_state = (TSParseState) {0};
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue