Allow anonymous tokens to be used in grammars' external token lists

This commit is contained in:
Max Brunsfeld 2017-03-17 16:31:29 -07:00
parent e2baf0930b
commit ed8fbff175
24 changed files with 282 additions and 183 deletions

View file

@ -6,11 +6,6 @@
using namespace rules;
using prepare_grammar::InitialSyntaxGrammar;
using prepare_grammar::expand_repeats;
using Variable = InitialSyntaxGrammar::Variable;
bool operator==(const Variable &left, const Variable &right) {
return left.name == right.name && left.rule == right.rule && left.type == right.type;
}
START_TEST

View file

@ -11,14 +11,12 @@ using namespace rules;
using prepare_grammar::extract_tokens;
using prepare_grammar::InternedGrammar;
using prepare_grammar::InitialSyntaxGrammar;
using InternedVariable = InternedGrammar::Variable;
using InitialSyntaxVariable = InitialSyntaxGrammar::Variable;
describe("extract_tokens", []() {
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
InternedVariable{
Variable{
"rule_A",
VariableTypeNamed,
Repeat{Rule::seq({
@ -34,17 +32,17 @@ describe("extract_tokens", []() {
}),
})}
},
InternedVariable{
Variable{
"rule_B",
VariableTypeNamed,
Pattern{"h+"}
},
InternedVariable{
Variable{
"rule_C",
VariableTypeNamed,
Rule::choice({ String{"i"}, Blank{} })
},
InternedVariable{
Variable{
"rule_D",
VariableTypeNamed,
Repeat{Symbol::non_terminal(3)}
@ -61,8 +59,8 @@ describe("extract_tokens", []() {
AssertThat(error, Equals(CompileError::none()));
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable>{
InitialSyntaxVariable{
AssertThat(syntax_grammar.variables, Equals(vector<Variable>{
Variable{
"rule_A",
VariableTypeNamed,
Repeat{Rule::seq({
@ -88,13 +86,13 @@ describe("extract_tokens", []() {
})}
},
InitialSyntaxVariable{
Variable{
"rule_C",
VariableTypeNamed,
Rule::choice({Symbol::terminal(4), Blank{}})
},
InitialSyntaxVariable{
Variable{
"rule_D",
VariableTypeNamed,
Repeat{Symbol::non_terminal(2)}
@ -168,8 +166,8 @@ describe("extract_tokens", []() {
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable> {
InitialSyntaxVariable{
AssertThat(syntax_grammar.variables, Equals(vector<Variable> {
Variable{
"rule_A",
VariableTypeNamed,
Rule::seq({
@ -192,17 +190,17 @@ describe("extract_tokens", []() {
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
auto result = extract_tokens(InternedGrammar{{
InternedVariable{
Variable{
"rule_A",
VariableTypeNamed,
Rule::seq({ Symbol::non_terminal(1), String{"ab"} })
},
InternedVariable{
Variable{
"rule_B",
VariableTypeNamed,
String{"cd"}
},
InternedVariable{
Variable{
"rule_C",
VariableTypeNamed,
Rule::seq({ String{"ef"}, String{"cd"} })
@ -212,18 +210,18 @@ describe("extract_tokens", []() {
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable>({
InitialSyntaxVariable{
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
Variable{
"rule_A",
VariableTypeNamed,
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) })
},
InitialSyntaxVariable{
Variable{
"rule_B",
VariableTypeNamed,
Symbol::terminal(1)
},
InitialSyntaxVariable{
Variable{
"rule_C",
VariableTypeNamed,
Rule::seq({ Symbol::terminal(2), Symbol::terminal(1) })
@ -255,17 +253,17 @@ describe("extract_tokens", []() {
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
auto result = extract_tokens(InternedGrammar{
{
InternedVariable{
Variable{
"rule_A",
VariableTypeNamed,
String{"ok"}
},
InternedVariable{
Variable{
"rule_B",
VariableTypeNamed,
Repeat{Symbol::non_terminal(0)}
},
InternedVariable{
Variable{
"rule_C",
VariableTypeNamed,
Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}}
@ -292,7 +290,7 @@ describe("extract_tokens", []() {
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
auto result = extract_tokens(InternedGrammar{
{
InternedVariable{"rule_A", VariableTypeNamed, String{"x"}},
Variable{"rule_A", VariableTypeNamed, String{"x"}},
},
{
String{"y"},
@ -314,8 +312,8 @@ describe("extract_tokens", []() {
it("handles inline extra tokens that match tokens in the grammar", [&]() {
auto result = extract_tokens(InternedGrammar{
{
InternedVariable{"rule_A", VariableTypeNamed, String{"x"}},
InternedVariable{"rule_B", VariableTypeNamed, String{"y"}},
Variable{"rule_A", VariableTypeNamed, String{"x"}},
Variable{"rule_B", VariableTypeNamed, String{"y"}},
},
{
String{"y"},
@ -332,17 +330,17 @@ describe("extract_tokens", []() {
it("updates extra symbols according to the new symbol numbers", [&]() {
auto result = extract_tokens(InternedGrammar{
{
InternedVariable{
Variable{
"rule_A",
VariableTypeNamed,
Rule::seq({ String{"w"}, String{"x"}, Symbol::non_terminal(1) })
},
InternedVariable{
Variable{
"rule_B",
VariableTypeNamed,
String{"y"}
},
InternedVariable{
Variable{
"rule_C",
VariableTypeNamed,
String{"z"}
@ -367,12 +365,12 @@ describe("extract_tokens", []() {
it("returns an error if any extra tokens are non-token symbols", [&]() {
auto result = extract_tokens(InternedGrammar{
{
InternedVariable{
Variable{
"rule_A",
VariableTypeNamed,
Rule::seq({ String{"x"}, Symbol::non_terminal(1) })
},
InternedVariable{
Variable{
"rule_B",
VariableTypeNamed,
Rule::seq({ String{"y"}, String{"z"} })
@ -428,7 +426,7 @@ describe("extract_tokens", []() {
{},
{},
{
ExternalToken {"rule_A", VariableTypeNamed, Symbol::non_terminal(0)}
Variable{"rule_A", VariableTypeNamed, Symbol::non_terminal(0)}
}
});

View file

@ -22,7 +22,7 @@ describe("intern_symbols", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<prepare_grammar::InternedGrammar::Variable>{
AssertThat(result.first.variables, Equals(vector<Variable>{
{"x", VariableTypeNamed, Rule::choice({ Symbol::non_terminal(1), Symbol::non_terminal(2) })},
{"y", VariableTypeNamed, Symbol::non_terminal(2)},
{"_z", VariableTypeHidden, String{"stuff"}},
@ -74,28 +74,28 @@ describe("intern_symbols", []() {
{},
{},
{
ExternalToken{
Variable{
"w",
VariableTypeNamed,
NONE()
NamedSymbol{"w"}
},
ExternalToken{
Variable{
"z",
VariableTypeNamed,
NONE()
NamedSymbol{"z"}
},
}
};
auto result = intern_symbols(grammar);
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>{
ExternalToken{
AssertThat(result.first.external_tokens, Equals(vector<Variable>{
Variable{
"w",
VariableTypeNamed,
rules::NONE()
Symbol::external(0)
},
ExternalToken{
Variable{
"z",
VariableTypeNamed,
Symbol::non_terminal(2)

View file

@ -0,0 +1,41 @@
=========================================
single-line statements - internal tokens
=========================================
a b
---
(statement (variable) (variable))
=========================================
multi-line statements - internal tokens
=========================================
a
b
---
(statement (variable) (variable))
=========================================
single-line statements - external tokens
=========================================
'hello' 'world'
---
(statement (string) (string))
=========================================
multi-line statements - external tokens
=========================================
'hello'
'world'
---
(statement (string) (string))

View file

@ -0,0 +1,35 @@
{
"name": "external_and_internal_anonymous_tokens",
"externals": [
{"type": "SYMBOL", "name": "string"},
{"type": "STRING", "value": "\n"}
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": "\n"}
]
},
"_expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "string"},
{"type": "SYMBOL", "name": "variable"},
{"type": "SYMBOL", "name": "number"}
]
},
"variable": {"type": "PATTERN", "value": "\\a+"},
"number": {"type": "PATTERN", "value": "\\d+"}
}
}

View file

@ -0,0 +1 @@
This grammar is just like the `external_and_internal_tokens` grammar, except that the shared external token is *anonymous*; it's specified as a string in the grammar.

View file

@ -0,0 +1,23 @@
#include "../external_and_internal_tokens/scanner.c"
void *tree_sitter_external_and_internal_anonymous_tokens_external_scanner_create() { return NULL; }
void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_destroy(void *payload) {}
void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_reset(void *payload) {}
bool tree_sitter_external_and_internal_anonymous_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
bool tree_sitter_external_and_internal_anonymous_tokens_external_scanner_scan(
void *payload,
TSLexer *lexer,
const bool *whitelist
) {
return tree_sitter_external_and_internal_tokens_external_scanner_scan(
payload,
lexer,
whitelist
);
}

View file

@ -2,8 +2,8 @@
"name": "external_and_internal_tokens",
"externals": [
"string",
"line_break"
{"type": "SYMBOL", "name": "string"},
{"type": "SYMBOL", "name": "line_break"}
],
"extras": [

View file

@ -2,7 +2,7 @@
"name": "external_extra_tokens",
"externals": [
"comment"
{"type": "SYMBOL", "name": "comment"}
],
"extras": [

View file

@ -2,9 +2,9 @@
"name": "external_tokens",
"externals": [
"_percent_string",
"_percent_string_start",
"_percent_string_end"
{"type": "SYMBOL", "name": "_percent_string"},
{"type": "SYMBOL", "name": "_percent_string_start"},
{"type": "SYMBOL", "name": "_percent_string_end"}
],
"extras": [

View file

@ -132,7 +132,7 @@ ostream &operator<<(ostream &stream, const Rule &rule) {
} // namespace rules
ostream &operator<<(ostream &stream, const InputGrammar::Variable &variable) {
ostream &operator<<(ostream &stream, const Variable &variable) {
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
}
@ -165,18 +165,6 @@ ostream &operator<<(ostream &stream, const PrecedenceRange &range) {
}
}
namespace prepare_grammar {
ostream &operator<<(ostream &stream, const prepare_grammar::InternedGrammar::Variable &variable) {
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
}
ostream &operator<<(ostream &stream, const prepare_grammar::InitialSyntaxGrammar::Variable &variable) {
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
}
} // namespace prepare_grammar
namespace build_tables {
ostream &operator<<(ostream &stream, const LexItem &item) {

View file

@ -111,6 +111,7 @@ ostream &operator<<(ostream &, const CompileError &);
ostream &operator<<(ostream &, const ExternalToken &);
ostream &operator<<(ostream &, const ProductionStep &);
ostream &operator<<(ostream &, const PrecedenceRange &);
ostream &operator<<(ostream &, const Variable &);
ostream &operator<<(ostream &, const LexicalVariable &);
namespace rules {
@ -130,13 +131,6 @@ ostream &operator<<(ostream &stream, const Rule &rule);
} // namespace rules
namespace prepare_grammar {
ostream &operator<<(ostream &, const InitialSyntaxGrammar::Variable &);
ostream &operator<<(ostream &, const InternedGrammar::Variable &);
} // namespace prepare_grammar
namespace build_tables {
class LexItem;