feat: allow external scanners to use the logger
Co-authored-by: Amaan Qureshi <amaanq12@gmail.com>
This commit is contained in:
parent
fec6c77da8
commit
2bb20fe2fe
5 changed files with 40 additions and 0 deletions
|
|
@ -1422,6 +1422,30 @@ if foo && bar || baz {}
|
|||
parser.parse(&input, Some(&tree)).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parsing_with_scanner_logging() {
|
||||
let dir = fixtures_dir().join("test_grammars").join("external_tokens");
|
||||
let grammar_json = load_grammar_file(&dir.join("grammar.js"), None).unwrap();
|
||||
let (grammar_name, parser_code) = generate_parser_for_grammar(&grammar_json).unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
parser
|
||||
.set_language(&get_test_language(&grammar_name, &parser_code, Some(&dir)))
|
||||
.unwrap();
|
||||
|
||||
let mut found = false;
|
||||
parser.set_logger(Some(Box::new(|log_type, message| {
|
||||
if log_type == LogType::Lex && message == "Found a percent string" {
|
||||
found = true;
|
||||
}
|
||||
})));
|
||||
|
||||
let source_code = "x + %(sup (external) scanner?)";
|
||||
|
||||
parser.parse(source_code, None).unwrap();
|
||||
assert!(found);
|
||||
}
|
||||
|
||||
const fn simple_range(start: usize, end: usize) -> Range {
|
||||
Range {
|
||||
start_byte: start,
|
||||
|
|
|
|||
|
|
@ -862,6 +862,7 @@ This function is responsible for recognizing external tokens. It should return `
|
|||
* **`uint32_t (*get_column)(TSLexer *)`** - A function for querying the current column position of the lexer. It returns the number of codepoints since the start of the current line. The codepoint position is recalculated on every call to this function by reading from the start of the line.
|
||||
* **`bool (*is_at_included_range_start)(const TSLexer *)`** - A function for checking whether the parser has just skipped some characters in the document. When parsing an embedded document using the `ts_parser_set_included_ranges` function (described in the [multi-language document section][multi-language-section]), the scanner may want to apply some special behavior when moving to a disjoint part of the document. For example, in [EJS documents][ejs], the JavaScript parser uses this function to enable inserting automatic semicolon tokens in between the code directives, delimited by `<%` and `%>`.
|
||||
* **`bool (*eof)(const TSLexer *)`** - A function for determining whether the lexer is at the end of the file. The value of `lookahead` will be `0` at the end of a file, but this function should be used instead of checking for that value because the `0` or "NUL" value is also a valid character that could be present in the file being parsed.
|
||||
- **`void (*log)(const TSLexer *, const char * format, ...)`** - A `printf`-like function for logging. The log is viewable through e.g. `tree-sitter parse --debug` or the browser's console after checking the `log` option in the [Playground](./playground).
|
||||
|
||||
The third argument to the `scan` function is an array of booleans that indicates which of external tokens are currently expected by the parser. You should only look for a given token if it is valid according to this array. At the same time, you cannot backtrack, so you may need to combine certain pieces of logic.
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "./subtree.h"
|
||||
#include "./length.h"
|
||||
#include "./unicode.h"
|
||||
#include <stdarg.h>
|
||||
|
||||
#define LOG(message, character) \
|
||||
if (self->logger.log) { \
|
||||
|
|
@ -284,6 +285,17 @@ static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
|
|||
}
|
||||
}
|
||||
|
||||
static void ts_lexer__log(const TSLexer *_self, const char *fmt, ...) {
|
||||
Lexer *self = (Lexer *)_self;
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
if (self->logger.log) {
|
||||
vsnprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, fmt, args);
|
||||
self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer);
|
||||
}
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
void ts_lexer_init(Lexer *self) {
|
||||
*self = (Lexer) {
|
||||
.data = {
|
||||
|
|
@ -295,6 +307,7 @@ void ts_lexer_init(Lexer *self) {
|
|||
.get_column = ts_lexer__get_column,
|
||||
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
|
||||
.eof = ts_lexer__eof,
|
||||
.log = ts_lexer__log,
|
||||
.lookahead = 0,
|
||||
.result_symbol = 0,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ struct TSLexer {
|
|||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
void (*log)(const TSLexer *, const char *, ...);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ bool tree_sitter_external_tokens_external_scanner_scan(
|
|||
|
||||
for (;;) {
|
||||
if (scanner->depth == 0) {
|
||||
lexer->log(lexer, "Found a percent string");
|
||||
lexer->result_symbol = percent_string;
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue