From 4131e1c16e9d70d9190663fc7645acf82ac97649 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 31 Jan 2017 11:36:51 -0800 Subject: [PATCH] Return an error when external token name matches non-terminal rule --- include/tree_sitter/compiler.h | 3 +- .../prepare_grammar/extract_tokens_spec.cc | 42 +++++++++++++++---- .../prepare_grammar/extract_tokens.cc | 18 ++++++-- src/compiler/rules/built_in_symbols.cc | 2 +- 4 files changed, 53 insertions(+), 12 deletions(-) diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index b362e535..1c287fd5 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -10,7 +10,8 @@ typedef enum { TSCompileErrorTypeInvalidGrammar, TSCompileErrorTypeInvalidRegex, TSCompileErrorTypeUndefinedSymbol, - TSCompileErrorTypeInvalidUbiquitousToken, + TSCompileErrorTypeInvalidExtraToken, + TSCompileErrorTypeInvalidExternalToken, TSCompileErrorTypeLexConflict, TSCompileErrorTypeParseConflict, TSCompileErrorTypeEpsilonRule, diff --git a/spec/compiler/prepare_grammar/extract_tokens_spec.cc b/spec/compiler/prepare_grammar/extract_tokens_spec.cc index 30a731c8..3aa576df 100644 --- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc +++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc @@ -130,11 +130,20 @@ describe("extract_tokens", []() { }); it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() { - auto result = extract_tokens(InternedGrammar{{ - Variable("rule_A", VariableTypeNamed, str("ok")), - Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))), - Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))), - }, { str(" ") }, { { Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) } }}); + auto result = extract_tokens(InternedGrammar{ + { + Variable("rule_A", VariableTypeNamed, str("ok")), + Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))), + Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))), + }, + { + str(" ") + }, + { + { Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) } + }, + {} + }); InitialSyntaxGrammar &syntax_grammar = get<0>(result); @@ -201,7 +210,7 @@ describe("extract_tokens", []() { AssertThat(get<2>(result), !Equals(CompileError::none())); AssertThat(get<2>(result), Equals( - CompileError(TSCompileErrorTypeInvalidUbiquitousToken, + CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: rule_B"))); }); @@ -213,11 +222,30 @@ describe("extract_tokens", []() { AssertThat(get<2>(result), !Equals(CompileError::none())); AssertThat(get<2>(result), Equals(CompileError( - TSCompileErrorTypeInvalidUbiquitousToken, + TSCompileErrorTypeInvalidExtraToken, "Not a token: (choice (non-terminal 1) (blank))" ))); }); }); + + it("returns an error if an external token has the same name as a non-terminal rule", [&]() { + auto result = extract_tokens(InternedGrammar{ + { + Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })), + Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })), + }, + {}, + {}, + { + ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)} + } + }); + + AssertThat(get<2>(result), Equals(CompileError( + TSCompileErrorTypeInvalidExternalToken, + "Name 'rule_A' cannot be used for both an external token and a non-terminal rule" + ))); + }); }); END_TEST diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index e84d028d..9d161ca8 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -91,8 +91,7 @@ class TokenExtractor : public rules::IdentityRuleFn { }; static CompileError extra_token_error(const string &message) { - return CompileError(TSCompileErrorTypeInvalidUbiquitousToken, - "Not a token: " + message); + return CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: " + message); } tuple extract_tokens( @@ -187,10 +186,23 @@ tuple extract_tokens( } for (const ExternalToken &external_token : grammar.external_tokens) { + Symbol internal_token = symbol_replacer.replace_symbol(external_token.corresponding_internal_token); + + if (internal_token.is_non_terminal()) { + return make_tuple( + syntax_grammar, + lexical_grammar, + CompileError( + TSCompileErrorTypeInvalidExternalToken, + "Name '" + external_token.name + "' cannot be used for both an external token and a non-terminal rule" + ) + ); + } + syntax_grammar.external_tokens.push_back({ external_token.name, external_token.type, - symbol_replacer.replace_symbol(external_token.corresponding_internal_token) + internal_token }); } diff --git a/src/compiler/rules/built_in_symbols.cc b/src/compiler/rules/built_in_symbols.cc index b3f7cd66..0fe45f68 100644 --- a/src/compiler/rules/built_in_symbols.cc +++ b/src/compiler/rules/built_in_symbols.cc @@ -12,7 +12,7 @@ Symbol START() { } Symbol NONE() { - return Symbol(-3, Symbol::NonTerminal); + return Symbol(-3, Symbol::Type(-1)); } } // namespace rules