From a1770ce844da2165b97c1422c371822cf9d623fa Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 12 Dec 2016 22:06:01 -0800 Subject: [PATCH] Allow external tokens to be used as extras --- .../external_scanners/extra_external_tokens.c | 32 ++++++++++++++ ...line_breaks.c => shared_external_tokens.c} | 0 spec/integration/compile_grammar_spec.cc | 44 ++++++++++++++++++- .../prepare_grammar/extract_tokens.cc | 2 +- 4 files changed, 75 insertions(+), 3 deletions(-) create mode 100644 spec/fixtures/external_scanners/extra_external_tokens.c rename spec/fixtures/external_scanners/{line_breaks.c => shared_external_tokens.c} (100%) diff --git a/spec/fixtures/external_scanners/extra_external_tokens.c b/spec/fixtures/external_scanners/extra_external_tokens.c new file mode 100644 index 00000000..45803213 --- /dev/null +++ b/spec/fixtures/external_scanners/extra_external_tokens.c @@ -0,0 +1,32 @@ +#include + +enum { + COMMENT, +}; + +void *ts_language_extra_external_tokens_external_scanner_create() { + return NULL; +} + +void ts_language_extra_external_tokens_external_scanner_destroy(void *payload) { +} + +bool ts_language_extra_external_tokens_external_scanner_scan( + void *payload, TSLexer *lexer, const bool *whitelist) { + + while (lexer->lookahead == ' ') { + lexer->advance(lexer, true); + } + + if (lexer->lookahead == '#') { + lexer->advance(lexer, false); + while (lexer->lookahead != '\n') { + lexer->advance(lexer, false); + } + + lexer->result_symbol = COMMENT; + return true; + } + + return false; +} diff --git a/spec/fixtures/external_scanners/line_breaks.c b/spec/fixtures/external_scanners/shared_external_tokens.c similarity index 100% rename from spec/fixtures/external_scanners/line_breaks.c rename to spec/fixtures/external_scanners/shared_external_tokens.c diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc index f26bbcc8..2c7560c0 100644 --- a/spec/integration/compile_grammar_spec.cc +++ b/spec/integration/compile_grammar_spec.cc @@ -630,7 +630,7 @@ describe("compile_grammar", []() { ts_document_set_language(document, load_compile_result( "shared_external_tokens", result, - "spec/fixtures/external_scanners/line_breaks.c" + "spec/fixtures/external_scanners/shared_external_tokens.c" )); ts_document_set_input_string(document, "a b\n"); @@ -641,7 +641,6 @@ describe("compile_grammar", []() { ts_document_parse(document); assert_root_node("(statement (variable) (variable) (line_break))"); - ts_document_set_input_string(document, "'hello' 'world'\n"); ts_document_parse(document); assert_root_node("(statement (string) (string) (line_break))"); @@ -650,6 +649,47 @@ describe("compile_grammar", []() { ts_document_parse(document); assert_root_node("(statement (string) (string) (line_break))"); }); + + it("allows external tokens to be used as extras", [&]() { + string grammar = R"JSON({ + "name": "extra_external_tokens", + + "externals": [ + "comment" + ], + + "extras": [ + {"type": "PATTERN", "value": "\\s"}, + {"type": "SYMBOL", "name": "comment"} + ], + + "rules": { + "assignment": { + "type": "SEQ", + "members": [ + {"type": "SYMBOL", "name": "variable"}, + {"type": "STRING", "value": "="}, + {"type": "SYMBOL", "name": "variable"} + ] + }, + + "variable": {"type": "PATTERN", "value": "\\a+"} + } + })JSON"; + + TSCompileResult result = ts_compile_grammar(grammar.c_str()); + AssertThat(result.error_message, IsNull()); + + ts_document_set_language(document, load_compile_result( + "extra_external_tokens", + result, + "spec/fixtures/external_scanners/extra_external_tokens.c" + )); + + ts_document_set_input_string(document, "x = # a comment\n y"); + ts_document_parse(document); + assert_root_node("(assignment (variable) (comment) (variable))"); + }); }); describe("when the grammar's start symbol is a token", [&]() { diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index d2b32769..ace6294a 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -177,7 +177,7 @@ tuple extract_tokens( extra_token_error(rule->to_string())); Symbol new_symbol = symbol_replacer.replace_symbol(*symbol); - if (!new_symbol.is_token()) { + if (new_symbol.is_non_terminal()) { return make_tuple( syntax_grammar, lexical_grammar, extra_token_error(syntax_grammar.variables[new_symbol.index].name));