Call external scanner functions when lexing

This commit is contained in:
Max Brunsfeld 2016-12-02 22:03:48 -08:00
parent c966af0412
commit 0f8e130687
12 changed files with 164 additions and 34 deletions

View file

@ -65,6 +65,7 @@ typedef union {
typedef struct TSLanguage {
uint32_t symbol_count;
uint32_t token_count;
uint32_t external_token_count;
const char **symbol_names;
const TSSymbolMetadata *symbol_metadata;
const unsigned short *parse_table;
@ -75,7 +76,7 @@ typedef struct TSLanguage {
const bool *external_token_lists;
struct {
void * (*create)();
bool (*scan)(TSLexer *, const bool *symbol_whitelist);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
void (*destroy)(void *);
} external_scanner;
} TSLanguage;
@ -158,7 +159,6 @@ typedef struct TSLanguage {
{ .type = TSParseActionTypeAccept } \
}
#define GET_LANGUAGE(...) \
static TSLanguage language = { \
.symbol_count = SYMBOL_COUNT, \
@ -169,6 +169,7 @@ typedef struct TSLanguage {
.lex_modes = ts_lex_modes, \
.symbol_names = ts_symbol_names, \
.lex_fn = ts_lex, \
.external_token_count = EXTERNAL_TOKEN_COUNT, \
.external_token_lists = (const bool *)ts_external_token_lists, \
.external_token_symbol_map = ts_external_token_symbol_map, \
.external_scanner = {__VA_ARGS__} \

View file

@ -5,6 +5,7 @@
#include "compiler/prepare_grammar/extract_tokens.h"
#include "helpers/rule_helpers.h"
#include "helpers/equals_pointer.h"
#include "helpers/stream_methods.h"
START_TEST
@ -211,9 +212,10 @@ describe("extract_tokens", []() {
}, { choice({ i_sym(1), blank() }) }, {}});
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
"Not a token: (choice (sym 1) (blank))")));
AssertThat(get<2>(result), Equals(CompileError(
TSCompileErrorTypeInvalidUbiquitousToken,
"Not a token: (choice (non-terminal 1) (blank))"
)));
});
});
});

View file

@ -1,13 +1,108 @@
#include <stdbool.h>
#include <tree_sitter/parser.h>
enum {
percent_string,
percent_string_start,
percent_string_end
};
typedef struct {
int32_t open_delimiter;
int32_t close_delimiter;
uint32_t depth;
} Scanner;
void *ts_language_external_scanner_example_external_scanner_create() {
puts("HELLO FROM EXTERNAL SCANNER");
return 0;
Scanner *scanner = malloc(sizeof(Scanner));
*scanner = (Scanner){
.open_delimiter = 0,
.close_delimiter = 0,
.depth = 0
};
return scanner;
}
bool ts_language_external_scanner_example_external_scanner_scan() {
return true;
bool ts_language_external_scanner_example_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *whitelist) {
Scanner *scanner = payload;
if (whitelist[percent_string]) {
while (lexer->lookahead == ' ' ||
lexer->lookahead == '\t' ||
lexer->lookahead == '\n') {
lexer->advance(lexer, 0, true);
}
if (lexer->lookahead != '%') return false;
lexer->advance(lexer, 0, false);
switch (lexer->lookahead) {
case '(':
scanner->open_delimiter = '(';
scanner->close_delimiter = ')';
scanner->depth = 1;
break;
case '[':
scanner->open_delimiter = '[';
scanner->close_delimiter = ']';
scanner->depth = 1;
break;
case '{':
scanner->open_delimiter = '{';
scanner->close_delimiter = '}';
scanner->depth = 1;
break;
default:
return false;
}
lexer->advance(lexer, 0, false);
for (;;) {
if (scanner->depth == 0) {
lexer->result_symbol = percent_string;
return true;
}
if (lexer->lookahead == scanner->open_delimiter) {
scanner->depth++;
} else if (lexer->lookahead == scanner->close_delimiter) {
scanner->depth--;
} else if (lexer->lookahead == '#') {
lexer->advance(lexer, 0, false);
if (lexer->lookahead == '{') {
lexer->advance(lexer, 0, false);
lexer->result_symbol = percent_string_start;
return true;
}
}
lexer->advance(lexer, 0, false);
}
} else if (whitelist[percent_string_end]) {
if (lexer->lookahead != '}') return false;
lexer->advance(lexer, 0, false);
for (;;) {
if (scanner->depth == 0) {
lexer->result_symbol = percent_string_end;
return true;
}
if (lexer->lookahead == scanner->open_delimiter) {
scanner->depth++;
} else if (lexer->lookahead == scanner->close_delimiter) {
scanner->depth--;
}
lexer->advance(lexer, 0, false);
}
}
return false;
}
void ts_language_external_scanner_example_external_scanner_destroy() {
void ts_language_external_scanner_example_external_scanner_destroy(void *payload) {
free(payload);
}

View file

@ -562,7 +562,11 @@ describe("compile_grammar", []() {
"spec/fixtures/external_scanners/external_scan.c"
));
ts_document_set_input_string(document, "%|hi|");
ts_document_set_input_string(document, "%(sup (external) scanner?)");
ts_document_parse(document);
assert_root_node("(string)");
ts_document_set_input_string(document, "%{sup {} external {} scanner?}");
ts_document_parse(document);
assert_root_node("(string)");

View file

@ -128,7 +128,7 @@ class CCodeGenerator {
void add_stats() {
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1));
line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1 + syntax_grammar.external_tokens.size()));
line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
line();
}
@ -327,7 +327,7 @@ class CCodeGenerator {
string external_scanner_name = "ts_language_" + name + "_external_scanner";
line("void *" + external_scanner_name + "_create();");
line("bool " + external_scanner_name + "_scan();");
line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
line("void " + external_scanner_name + "_destroy();");
line();

View file

@ -13,8 +13,8 @@ class Symbol : public Rule {
typedef enum {
Terminal,
NonTerminal,
External,
NonTerminal,
} Type;
Symbol(Index index, Type type);

View file

@ -37,7 +37,7 @@ const TSLanguage *ts_document_language(TSDocument *self) {
void ts_document_set_language(TSDocument *self, const TSLanguage *language) {
ts_document_invalidate(self);
self->parser.language = language;
parser_set_language(&self->parser, language);
if (self->tree) {
ts_tree_release(self->tree);
self->tree = NULL;

View file

@ -49,6 +49,12 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
}
}
static inline const bool *
ts_language_enabled_external_tokens(const TSLanguage *self,
unsigned external_scanner_state) {
return self->external_token_lists + self->external_token_count * external_scanner_state;
}
#ifdef __cplusplus
}
#endif

View file

@ -123,9 +123,7 @@ void ts_lexer_reset(Lexer *self, Length position) {
return;
}
void ts_lexer_start(Lexer *self, TSStateId lex_state) {
LOG("start_lex state:%d, pos:%u", lex_state, self->current_position.chars);
void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->data.result_symbol = 0;

View file

@ -30,7 +30,7 @@ typedef struct {
void ts_lexer_init(Lexer *);
void ts_lexer_set_input(Lexer *, TSInput);
void ts_lexer_reset(Lexer *, Length);
void ts_lexer_start(Lexer *, TSStateId);
void ts_lexer_start(Lexer *);
#ifdef __cplusplus
}

View file

@ -209,23 +209,43 @@ static bool parser__condense_stack(Parser *self) {
}
static Tree *parser__lex(Parser *self, TSStateId parse_state) {
Length start_position = self->lexer.current_position;
ts_lexer_start(&self->lexer);
TSLexMode lex_mode = self->language->lex_modes[parse_state];
if (lex_mode.external_tokens) {
const bool *external_tokens = ts_language_enabled_external_tokens(self->language, lex_mode.external_tokens);
if (self->language->external_scanner.scan(
self->external_scanner_payload,
&self->lexer.data,
external_tokens
)) {
TSSymbol symbol = self->language->external_token_symbol_map[self->lexer.data.result_symbol];
Length padding = length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
Tree *result = ts_tree_make_leaf(symbol, padding, size, metadata);
result->parse_state = parse_state;
return result;
} else {
ts_lexer_reset(&self->lexer, start_position);
}
}
TSStateId start_state = self->language->lex_modes[parse_state].lex_state;
TSStateId current_state = start_state;
Length start_position = self->lexer.current_position;
LOG("lex state:%d", start_state);
bool skipped_error = false;
int32_t first_error_character = 0;
Length error_start_position, error_end_position;
ts_lexer_start(&self->lexer, start_state);
while (!self->language->lex_fn(&self->lexer.data, current_state)) {
if (current_state != ERROR_STATE) {
LOG("retry_in_error_mode");
current_state = ERROR_STATE;
ts_lexer_reset(&self->lexer, start_position);
ts_lexer_start(&self->lexer, current_state);
ts_lexer_start(&self->lexer);
continue;
}
@ -247,7 +267,6 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
}
Tree *result;
if (skipped_error) {
Length padding = length_sub(error_start_position, start_position);
Length size = length_sub(error_end_position, error_start_position);
@ -255,18 +274,12 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
result = ts_tree_make_error(size, padding, first_error_character);
} else {
TSSymbol symbol = self->lexer.data.result_symbol;
Length padding =
length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.current_position,
self->lexer.token_start_position);
result =
ts_tree_make_leaf(symbol, padding, size,
ts_language_symbol_metadata(self->language, symbol));
Length padding = length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
result = ts_tree_make_leaf(symbol, padding, size, metadata);
}
if (!result)
return NULL;
result->parse_state = parse_state;
result->first_leaf.lex_state = start_state;
return result;
@ -1106,6 +1119,15 @@ bool parser_init(Parser *self) {
return true;
}
void parser_set_language(Parser *self, const TSLanguage *language) {
self->language = language;
if (language->external_scanner.create) {
self->external_scanner_payload = language->external_scanner.create();
} else {
self->external_scanner_payload = NULL;
}
}
void parser_destroy(Parser *self) {
if (self->stack)
ts_stack_delete(self->stack);

View file

@ -29,11 +29,13 @@ typedef struct {
ReusableNode reusable_node;
TreePath tree_path1;
TreePath tree_path2;
void *external_scanner_payload;
} Parser;
bool parser_init(Parser *);
void parser_destroy(Parser *);
Tree *parser_parse(Parser *, TSInput, Tree *);
void parser_set_language(Parser *, const TSLanguage *);
#ifdef __cplusplus
}