From ad4089a4bfa8a6ffc758cf80e6585a15ec37563b Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Jan 2016 10:26:52 -0800 Subject: [PATCH] Move anonymous tokens grammar into integration spec --- spec/compiler/compile_examples.cc | 5 - spec/fixtures/grammars/anonymous_tokens.cc | 16 --- spec/fixtures/parsers/anonymous_tokens.c | 153 --------------------- spec/integration/compile_grammar_spec.cc | 46 ++++++- 4 files changed, 40 insertions(+), 180 deletions(-) delete mode 100644 spec/fixtures/grammars/anonymous_tokens.cc delete mode 100644 spec/fixtures/parsers/anonymous_tokens.c diff --git a/spec/compiler/compile_examples.cc b/spec/compiler/compile_examples.cc index 67fc9f08..0852eb8d 100644 --- a/spec/compiler/compile_examples.cc +++ b/spec/compiler/compile_examples.cc @@ -41,17 +41,12 @@ describe("compiling the example grammars", []() { }); }; - // example languages compile_grammar(tree_sitter_examples::arithmetic, "arithmetic"); compile_grammar(tree_sitter_examples::json, "json"); compile_grammar(tree_sitter_examples::javascript, "javascript"); compile_grammar(tree_sitter_examples::golang, "golang"); compile_grammar(tree_sitter_examples::c, "c"); compile_grammar(tree_sitter_examples::cpp, "cpp"); - - // edge cases - compile_grammar(tree_sitter_examples::anonymous_tokens, "anonymous_tokens"); - }); END_TEST diff --git a/spec/fixtures/grammars/anonymous_tokens.cc b/spec/fixtures/grammars/anonymous_tokens.cc deleted file mode 100644 index d9639b51..00000000 --- a/spec/fixtures/grammars/anonymous_tokens.cc +++ /dev/null @@ -1,16 +0,0 @@ -#include "tree_sitter/compiler.h" -#include "helpers.h" - -namespace tree_sitter_examples { - -extern const Grammar anonymous_tokens{{ - { "program", choice({ - str("\n"), - str("\r"), - pattern("\\d"), - str("\"hello\"") }) }, -}, { - pattern("\\s"), -}, {}}; - -} // namespace tree_sitter_examples diff --git a/spec/fixtures/parsers/anonymous_tokens.c b/spec/fixtures/parsers/anonymous_tokens.c deleted file mode 100644 index bc6ede0f..00000000 --- a/spec/fixtures/parsers/anonymous_tokens.c +++ /dev/null @@ -1,153 +0,0 @@ -#include "tree_sitter/parser.h" - -#define STATE_COUNT 3 -#define SYMBOL_COUNT 7 - -enum { - sym_program = ts_builtin_sym_start, - anon_sym_LF, - anon_sym_CR, - aux_sym_SLASH_BSLASHd_SLASH, - anon_sym_DQUOTEhello_DQUOTE, -}; - -static const char *ts_symbol_names[] = { - [sym_program] = "program", - [ts_builtin_sym_error] = "ERROR", - [ts_builtin_sym_end] = "END", - [anon_sym_LF] = "\n", - [anon_sym_CR] = "\r", - [aux_sym_SLASH_BSLASHd_SLASH] = "/\\d/", - [anon_sym_DQUOTEhello_DQUOTE] = "\"hello\"", -}; - -static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = { - [sym_program] = {.visible = true, .named = true, .structural = true, .extra = false}, - [ts_builtin_sym_error] = {.visible = true, .named = true, .structural = false, .extra = false}, - [ts_builtin_sym_end] = {.visible = false, .named = false, .structural = true, .extra = false}, - [anon_sym_LF] = {.visible = true, .named = false, .structural = true, .extra = false}, - [anon_sym_CR] = {.visible = true, .named = false, .structural = true, .extra = false}, - [aux_sym_SLASH_BSLASHd_SLASH] = {.visible = false, .named = false, .structural = true, .extra = false}, - [anon_sym_DQUOTEhello_DQUOTE] = {.visible = true, .named = false, .structural = true, .extra = false}, -}; - -static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) { - START_LEXER(); - switch (state) { - case 0: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == ' ')) - ADVANCE(0); - if (lookahead == '\n') - ADVANCE(2); - if (lookahead == '\r') - ADVANCE(3); - if (lookahead == '\"') - ADVANCE(4); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(11); - LEX_ERROR(); - case 1: - ACCEPT_TOKEN(ts_builtin_sym_end); - case 2: - START_TOKEN(); - ACCEPT_TOKEN(anon_sym_LF); - case 3: - START_TOKEN(); - ACCEPT_TOKEN(anon_sym_CR); - case 4: - if (lookahead == 'h') - ADVANCE(5); - LEX_ERROR(); - case 5: - if (lookahead == 'e') - ADVANCE(6); - LEX_ERROR(); - case 6: - if (lookahead == 'l') - ADVANCE(7); - LEX_ERROR(); - case 7: - if (lookahead == 'l') - ADVANCE(8); - LEX_ERROR(); - case 8: - if (lookahead == 'o') - ADVANCE(9); - LEX_ERROR(); - case 9: - if (lookahead == '\"') - ADVANCE(10); - LEX_ERROR(); - case 10: - ACCEPT_TOKEN(anon_sym_DQUOTEhello_DQUOTE); - case 11: - ACCEPT_TOKEN(aux_sym_SLASH_BSLASHd_SLASH); - case 12: - START_TOKEN(); - if ((lookahead == '\t') || - (lookahead == ' ')) - ADVANCE(12); - if (lookahead == '\n') - ADVANCE(2); - if (lookahead == '\r') - ADVANCE(3); - if (lookahead == '\"') - ADVANCE(4); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(11); - LEX_ERROR(); - case 13: - START_TOKEN(); - if (lookahead == 0) - ADVANCE(1); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(13); - LEX_ERROR(); - default: - LEX_ERROR(); - } -} - -static TSStateId ts_lex_states[STATE_COUNT] = { - [0] = 12, - [1] = 13, - [2] = 13, -}; - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wmissing-field-initializers" - -static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = { - [0] = { - [sym_program] = 2, - [anon_sym_LF] = 4, - [anon_sym_CR] = 4, - [aux_sym_SLASH_BSLASHd_SLASH] = 4, - [anon_sym_DQUOTEhello_DQUOTE] = 4, - }, - [1] = { - [ts_builtin_sym_end] = 6, - }, - [2] = { - [ts_builtin_sym_end] = 8, - }, -}; - -static TSParseActionEntry ts_parse_actions[] = { - [0] = {.count = 1}, ERROR(), - [2] = {.count = 1}, SHIFT(1, 0), - [4] = {.count = 1}, SHIFT(2, 0), - [6] = {.count = 1}, ACCEPT_INPUT(), - [8] = {.count = 1}, REDUCE(sym_program, 1, 0), -}; - -#pragma GCC diagnostic pop - -EXPORT_LANGUAGE(ts_language_anonymous_tokens); diff --git a/spec/integration/compile_grammar_spec.cc b/spec/integration/compile_grammar_spec.cc index 80fe0763..783474f2 100644 --- a/spec/integration/compile_grammar_spec.cc +++ b/spec/integration/compile_grammar_spec.cc @@ -25,12 +25,10 @@ describe("compile_grammar", []() { } )JSON"); - const TSLanguage *language = load_language("test_language", result); + ts_document_set_language(document, load_language("test_language", result)); - ts_document_set_language(document, language); ts_document_set_input_string(document, "the-value"); ts_document_parse(document); - TSNode root_node = ts_document_root_node(document); AssertThat(ts_node_string(root_node, document), Equals("(first_rule)")); }); @@ -47,17 +45,53 @@ describe("compile_grammar", []() { } )JSON"); - const TSLanguage *language = load_language("test_language", result); + ts_document_set_language(document, load_language("test_language", result)); - ts_document_set_language(document, language); ts_document_set_input_string(document, ""); ts_document_parse(document); - TSNode root_node = ts_document_root_node(document); AssertThat(ts_node_string(root_node, document), Equals("(first_rule)")); }); }); + describe("when the grammar contains anonymous tokens with escaped characters", [&]() { + it("escapes the escaped characters properly in the generated parser", [&]() { + TSCompileResult result = ts_compile_grammar(R"JSON( + { + "name": "test_language", + "rules": { + "first_rule": { + "type": "CHOICE", + "members": [ + {"type": "STRING", "value": "\n"}, + {"type": "STRING", "value": "\r"}, + {"type": "STRING", "value": "'hello'"}, + {"type": "PATTERN", "value": "\\d+"} + ] + } + } + } + )JSON"); + + ts_document_set_language(document, load_language("test_language", result)); + + ts_document_set_input_string(document, "1234"); + ts_document_parse(document); + TSNode root_node = ts_document_root_node(document); + AssertThat(ts_node_string(root_node, document), Equals("(first_rule)")); + + ts_document_set_input_string(document, "\n"); + ts_document_parse(document); + root_node = ts_document_root_node(document); + AssertThat(ts_node_string(root_node, document), Equals("(first_rule)")); + + ts_document_set_input_string(document, "'hello'"); + ts_document_parse(document); + root_node = ts_document_root_node(document); + AssertThat(ts_node_string(root_node, document), Equals("(first_rule)")); + }); + }); + describe("the grammar in the README", [&]() { it("works", [&]() { TSCompileResult result = ts_compile_grammar(R"JSON(