Allow anonymous tokens to be used in grammars' external token lists
This commit is contained in:
parent
e2baf0930b
commit
ed8fbff175
24 changed files with 282 additions and 183 deletions
|
|
@ -38,7 +38,7 @@ class ParseTableBuilder {
|
|||
set<string> conflicts;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
set<const Production *> fragile_productions;
|
||||
vector<set<Symbol::Index>> incompatible_token_indices_by_index;
|
||||
vector<set<Symbol>> incompatible_tokens_by_index;
|
||||
bool allow_any_conflict;
|
||||
|
||||
public:
|
||||
|
|
@ -109,10 +109,13 @@ class ParseTableBuilder {
|
|||
void build_error_parse_state() {
|
||||
ParseState error_state;
|
||||
|
||||
for (Symbol::Index i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol token = Symbol::terminal(i);
|
||||
bool has_non_reciprocal_conflict = false;
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices_by_index[i]) {
|
||||
if (!incompatible_token_indices_by_index[incompatible_index].count(i)) {
|
||||
|
||||
for (Symbol incompatible_token : incompatible_tokens_by_index[i]) {
|
||||
if (incompatible_token.is_terminal() &&
|
||||
!incompatible_tokens_by_index[incompatible_token.index].count(token)) {
|
||||
has_non_reciprocal_conflict = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -302,28 +305,25 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void compute_unmergable_token_pairs() {
|
||||
incompatible_token_indices_by_index.resize(lexical_grammar.variables.size());
|
||||
incompatible_tokens_by_index.resize(lexical_grammar.variables.size());
|
||||
|
||||
// First, assume that all tokens are mutually incompatible.
|
||||
for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
auto &incompatible_indices = incompatible_token_indices_by_index[i];
|
||||
for (Symbol::Index j = 0; j < n; j++) {
|
||||
if (j != i) incompatible_indices.insert(j);
|
||||
}
|
||||
}
|
||||
|
||||
// For the remaining possibly-incompatible pairs of tokens, check if they
|
||||
// are actually incompatible by actually generating lexical states that
|
||||
// contain them both.
|
||||
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
|
||||
for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
auto &incompatible_indices = incompatible_token_indices_by_index[i];
|
||||
auto iter = incompatible_indices.begin();
|
||||
while (iter != incompatible_indices.end()) {
|
||||
if (lex_table_builder->detect_conflict(i, *iter)) {
|
||||
++iter;
|
||||
} else {
|
||||
iter = incompatible_indices.erase(iter);
|
||||
for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
Symbol token = Symbol::terminal(i);
|
||||
auto &incompatible_indices = incompatible_tokens_by_index[i];
|
||||
|
||||
for (unsigned j = 0; j < n; j++) {
|
||||
if (i == j) continue;
|
||||
if (lex_table_builder->detect_conflict(i, j)) {
|
||||
incompatible_indices.insert(Symbol::terminal(j));
|
||||
}
|
||||
}
|
||||
|
||||
for (const ExternalToken &external_token : grammar.external_tokens) {
|
||||
if (external_token.corresponding_internal_token == token) {
|
||||
for (unsigned j = 0; j < grammar.external_tokens.size(); j++) {
|
||||
incompatible_indices.insert(Symbol::external(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -419,15 +419,14 @@ class ParseTableBuilder {
|
|||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index];
|
||||
auto &incompatible_tokens = incompatible_tokens_by_index[lookahead.index];
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices) {
|
||||
Symbol incompatible_symbol = Symbol::terminal(incompatible_index);
|
||||
if (other.terminal_entries.count(incompatible_symbol)) return false;
|
||||
for (const Symbol &incompatible_token : incompatible_tokens) {
|
||||
if (other.terminal_entries.count(incompatible_token)) return false;
|
||||
}
|
||||
}
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
|
|
@ -444,14 +443,13 @@ class ParseTableBuilder {
|
|||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index];
|
||||
auto &incompatible_tokens = incompatible_tokens_by_index[lookahead.index];
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices) {
|
||||
Symbol incompatible_symbol = Symbol::terminal(incompatible_index);
|
||||
if (state.terminal_entries.count(incompatible_symbol)) return false;
|
||||
for (const Symbol &incompatible_token : incompatible_tokens) {
|
||||
if (state.terminal_entries.count(incompatible_token)) return false;
|
||||
}
|
||||
}
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
|
|
|
|||
|
|
@ -526,7 +526,7 @@ class CCodeGenerator {
|
|||
// Helper functions
|
||||
|
||||
string external_token_id(Symbol::Index index) {
|
||||
return "ts_external_token_" + syntax_grammar.external_tokens[index].name;
|
||||
return "ts_external_token_" + sanitize_name(syntax_grammar.external_tokens[index].name);
|
||||
}
|
||||
|
||||
string symbol_id(const Symbol &symbol) {
|
||||
|
|
|
|||
|
|
@ -16,29 +16,21 @@ enum VariableType {
|
|||
VariableTypeNamed,
|
||||
};
|
||||
|
||||
struct ExternalToken {
|
||||
struct Variable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Symbol corresponding_internal_token;
|
||||
rules::Rule rule;
|
||||
|
||||
inline bool operator==(const ExternalToken &other) const {
|
||||
return name == other.name &&
|
||||
type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
inline bool operator==(const Variable &other) const {
|
||||
return name == other.name && rule == other.rule && type == other.type;
|
||||
}
|
||||
};
|
||||
|
||||
struct InputGrammar {
|
||||
struct Variable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Rule rule;
|
||||
};
|
||||
|
||||
std::vector<Variable> variables;
|
||||
std::vector<rules::Rule> extra_tokens;
|
||||
std::vector<std::unordered_set<rules::NamedSymbol>> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
std::vector<Variable> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
error_message = result.error_message;
|
||||
goto error;
|
||||
}
|
||||
grammar.variables.push_back(InputGrammar::Variable{
|
||||
grammar.variables.push_back(Variable{
|
||||
string(entry_json.name),
|
||||
VariableTypeNamed,
|
||||
result.rule
|
||||
|
|
@ -293,18 +293,21 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
}
|
||||
|
||||
for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) {
|
||||
json_value *token_name_json = external_tokens_json.u.array.values[i];
|
||||
if (token_name_json->type != json_string) {
|
||||
error_message = "External token values must be strings";
|
||||
json_value *external_token_json = external_tokens_json.u.array.values[i];
|
||||
auto result = parse_rule(external_token_json);
|
||||
if (!result.error_message.empty()) {
|
||||
error_message = "Invalid external token: " + result.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
string token_name = token_name_json->u.string.ptr;
|
||||
grammar.external_tokens.push_back({
|
||||
token_name,
|
||||
VariableTypeNamed,
|
||||
rules::NONE()
|
||||
});
|
||||
grammar.external_tokens.push_back(result.rule.match(
|
||||
[](rules::NamedSymbol named_symbol) {
|
||||
return Variable{named_symbol.value, VariableTypeNamed, named_symbol};
|
||||
},
|
||||
[](auto rule) {
|
||||
return Variable{"", VariableTypeAnonymous, rule};
|
||||
}
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ class ExpandRepeats {
|
|||
return apply(rule);
|
||||
}
|
||||
|
||||
vector<InitialSyntaxGrammar::Variable> aux_rules;
|
||||
vector<Variable> aux_rules;
|
||||
};
|
||||
|
||||
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
|
||||
|
|
|
|||
|
|
@ -156,7 +156,7 @@ class TokenExtractor {
|
|||
}
|
||||
|
||||
vector<size_t> token_usage_counts;
|
||||
vector<InternedGrammar::Variable> tokens;
|
||||
vector<Variable> tokens;
|
||||
};
|
||||
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
||||
|
|
@ -167,8 +167,8 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
SymbolReplacer symbol_replacer;
|
||||
TokenExtractor extractor;
|
||||
|
||||
// First, extract all of the grammar's tokens into the lexical grammar.
|
||||
vector<InitialSyntaxGrammar::Variable> processed_variables;
|
||||
// Extract all of the grammar's tokens into the lexical grammar.
|
||||
vector<Variable> processed_variables;
|
||||
for (const auto &variable : grammar.variables) {
|
||||
processed_variables.push_back({
|
||||
variable.name,
|
||||
|
|
@ -177,6 +177,15 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
});
|
||||
}
|
||||
|
||||
vector<Variable> processed_external_tokens;
|
||||
for (const auto &external_token : grammar.external_tokens) {
|
||||
processed_external_tokens.push_back({
|
||||
external_token.name,
|
||||
external_token.type,
|
||||
extractor.apply(external_token.rule)
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto &extracted_token : extractor.tokens) {
|
||||
auto expansion = expand_token(extracted_token.rule);
|
||||
if (expansion.error) return make_tuple(
|
||||
|
|
@ -269,12 +278,22 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
if (error) return make_tuple(syntax_grammar, lexical_grammar, error);
|
||||
}
|
||||
|
||||
for (const ExternalToken &external_token : grammar.external_tokens) {
|
||||
Symbol internal_token = symbol_replacer.replace_symbol(
|
||||
external_token.corresponding_internal_token
|
||||
);
|
||||
for (const auto &external_token : processed_external_tokens) {
|
||||
Rule new_rule = symbol_replacer.apply(external_token.rule);
|
||||
|
||||
if (internal_token.is_non_terminal()) {
|
||||
if (!new_rule.is<Symbol>()) {
|
||||
return make_tuple(
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
CompileError(
|
||||
TSCompileErrorTypeInvalidExternalToken,
|
||||
"Non-symbol rule expressions can't be used as external tokens"
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
Symbol symbol = new_rule.get_unchecked<Symbol>();
|
||||
if (symbol.is_non_terminal()) {
|
||||
return make_tuple(
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
|
|
@ -285,11 +304,19 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
);
|
||||
}
|
||||
|
||||
syntax_grammar.external_tokens.push_back(ExternalToken{
|
||||
external_token.name,
|
||||
external_token.type,
|
||||
internal_token
|
||||
});
|
||||
if (symbol.is_external()) {
|
||||
syntax_grammar.external_tokens.push_back(ExternalToken{
|
||||
external_token.name,
|
||||
external_token.type,
|
||||
rules::NONE()
|
||||
});
|
||||
} else {
|
||||
syntax_grammar.external_tokens.push_back(ExternalToken{
|
||||
lexical_grammar.variables[symbol.index].name,
|
||||
external_token.type,
|
||||
symbol
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ class FlattenRule {
|
|||
}
|
||||
};
|
||||
|
||||
SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable) {
|
||||
SyntaxVariable flatten_rule(const Variable &variable) {
|
||||
vector<Production> productions;
|
||||
|
||||
for (const Rule &rule_component : extract_choices(variable.rule)) {
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
SyntaxVariable flatten_rule(const InitialSyntaxGrammar::Variable &variable);
|
||||
SyntaxVariable flatten_rule(const Variable &variable);
|
||||
std::pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -5,22 +5,13 @@
|
|||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InitialSyntaxGrammar {
|
||||
struct Variable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Rule rule;
|
||||
|
||||
inline bool operator==(const Variable &other) const {
|
||||
return name == other.name && type == other.type && rule == other.rule;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<Variable> variables;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
|
|
|
|||
|
|
@ -21,14 +21,21 @@ class SymbolInterner {
|
|||
public:
|
||||
Rule apply(const Rule &rule) {
|
||||
return rule.match(
|
||||
[&](const rules::Blank &blank) -> Rule { return blank; },
|
||||
[&](const rules::Blank &blank) -> Rule {
|
||||
return blank;
|
||||
},
|
||||
|
||||
[&](const rules::NamedSymbol &symbol) {
|
||||
return intern_symbol(symbol);
|
||||
},
|
||||
|
||||
[&](const rules::String &string) { return string; },
|
||||
[&](const rules::Pattern &pattern) { return pattern; },
|
||||
[&](const rules::String &string) {
|
||||
return string;
|
||||
},
|
||||
|
||||
[&](const rules::Pattern &pattern) {
|
||||
return pattern;
|
||||
},
|
||||
|
||||
[&](const rules::Choice &choice) {
|
||||
vector<rules::Rule> elements;
|
||||
|
|
@ -58,12 +65,18 @@ class SymbolInterner {
|
|||
}
|
||||
|
||||
Symbol intern_symbol(rules::NamedSymbol named_symbol) {
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++)
|
||||
if (grammar.variables[i].name == named_symbol.value)
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
if (grammar.variables[i].name == named_symbol.value) {
|
||||
return Symbol::non_terminal(i);
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
|
||||
if (grammar.external_tokens[i].name == named_symbol.value)
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
|
||||
if (grammar.external_tokens[i].name == named_symbol.value) {
|
||||
return Symbol::external(i);
|
||||
}
|
||||
}
|
||||
|
||||
missing_rule_name = named_symbol.value;
|
||||
return rules::NONE();
|
||||
}
|
||||
|
|
@ -81,23 +94,21 @@ CompileError missing_rule_error(string rule_name) {
|
|||
pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar) {
|
||||
InternedGrammar result;
|
||||
|
||||
SymbolInterner interner(grammar);
|
||||
|
||||
for (auto &external_token : grammar.external_tokens) {
|
||||
Symbol corresponding_internal_token = rules::NONE();
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
if (grammar.variables[i].name == external_token.name) {
|
||||
corresponding_internal_token = Symbol::non_terminal(i);
|
||||
break;
|
||||
}
|
||||
auto new_rule = interner.apply(external_token.rule);
|
||||
if (!interner.missing_rule_name.empty()) {
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
}
|
||||
|
||||
result.external_tokens.push_back(ExternalToken{
|
||||
result.external_tokens.push_back(Variable{
|
||||
external_token.name,
|
||||
external_token.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
corresponding_internal_token
|
||||
external_token.name[0] == '_' ? VariableTypeHidden : external_token.type,
|
||||
new_rule
|
||||
});
|
||||
}
|
||||
|
||||
SymbolInterner interner(grammar);
|
||||
|
||||
for (auto &variable : grammar.variables) {
|
||||
auto new_rule = interner.apply(variable.rule);
|
||||
|
|
@ -105,7 +116,7 @@ pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar)
|
|||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
}
|
||||
|
||||
result.variables.push_back(InternedGrammar::Variable{
|
||||
result.variables.push_back(Variable{
|
||||
variable.name,
|
||||
variable.name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
new_rule
|
||||
|
|
@ -131,7 +142,7 @@ pair<InternedGrammar, CompileError> intern_symbols(const InputGrammar &grammar)
|
|||
result.expected_conflicts.insert(entry);
|
||||
}
|
||||
|
||||
return { result, CompileError::none() };
|
||||
return {result, CompileError::none()};
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -11,20 +11,10 @@ namespace tree_sitter {
|
|||
namespace prepare_grammar {
|
||||
|
||||
struct InternedGrammar {
|
||||
struct Variable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Rule rule;
|
||||
|
||||
bool operator==(const Variable &other) const {
|
||||
return name == other.name && type == other.type && rule == other.rule;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<Variable> variables;
|
||||
std::vector<rules::Rule> extra_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
std::vector<Variable> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -30,6 +30,18 @@ struct SyntaxVariable {
|
|||
|
||||
using ConflictSet = std::set<rules::Symbol>;
|
||||
|
||||
struct ExternalToken {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Symbol corresponding_internal_token;
|
||||
|
||||
inline bool operator==(const ExternalToken &other) const {
|
||||
return name == other.name &&
|
||||
type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
}
|
||||
};
|
||||
|
||||
struct SyntaxGrammar {
|
||||
std::vector<SyntaxVariable> variables;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
|
|
|
|||
|
|
@ -6,11 +6,6 @@
|
|||
using namespace rules;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
using prepare_grammar::expand_repeats;
|
||||
using Variable = InitialSyntaxGrammar::Variable;
|
||||
|
||||
bool operator==(const Variable &left, const Variable &right) {
|
||||
return left.name == right.name && left.rule == right.rule && left.type == right.type;
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
|
|||
|
|
@ -11,14 +11,12 @@ using namespace rules;
|
|||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InternedGrammar;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
using InternedVariable = InternedGrammar::Variable;
|
||||
using InitialSyntaxVariable = InitialSyntaxGrammar::Variable;
|
||||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Repeat{Rule::seq({
|
||||
|
|
@ -34,17 +32,17 @@ describe("extract_tokens", []() {
|
|||
}),
|
||||
})}
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Pattern{"h+"}
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Rule::choice({ String{"i"}, Blank{} })
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_D",
|
||||
VariableTypeNamed,
|
||||
Repeat{Symbol::non_terminal(3)}
|
||||
|
|
@ -61,8 +59,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(error, Equals(CompileError::none()));
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable>{
|
||||
InitialSyntaxVariable{
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>{
|
||||
Variable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Repeat{Rule::seq({
|
||||
|
|
@ -88,13 +86,13 @@ describe("extract_tokens", []() {
|
|||
})}
|
||||
},
|
||||
|
||||
InitialSyntaxVariable{
|
||||
Variable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Rule::choice({Symbol::terminal(4), Blank{}})
|
||||
},
|
||||
|
||||
InitialSyntaxVariable{
|
||||
Variable{
|
||||
"rule_D",
|
||||
VariableTypeNamed,
|
||||
Repeat{Symbol::non_terminal(2)}
|
||||
|
|
@ -168,8 +166,8 @@ describe("extract_tokens", []() {
|
|||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable> {
|
||||
InitialSyntaxVariable{
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable> {
|
||||
Variable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
|
|
@ -192,17 +190,17 @@ describe("extract_tokens", []() {
|
|||
|
||||
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ Symbol::non_terminal(1), String{"ab"} })
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
String{"cd"}
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"ef"}, String{"cd"} })
|
||||
|
|
@ -212,18 +210,18 @@ describe("extract_tokens", []() {
|
|||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<InitialSyntaxVariable>({
|
||||
InitialSyntaxVariable{
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ Symbol::non_terminal(1), Symbol::terminal(0) })
|
||||
},
|
||||
InitialSyntaxVariable{
|
||||
Variable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Symbol::terminal(1)
|
||||
},
|
||||
InitialSyntaxVariable{
|
||||
Variable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ Symbol::terminal(2), Symbol::terminal(1) })
|
||||
|
|
@ -255,17 +253,17 @@ describe("extract_tokens", []() {
|
|||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
String{"ok"}
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Repeat{Symbol::non_terminal(0)}
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
Repeat{Seq{Symbol::non_terminal(0), Symbol::non_terminal(0)}}
|
||||
|
|
@ -292,7 +290,7 @@ describe("extract_tokens", []() {
|
|||
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
InternedVariable{"rule_A", VariableTypeNamed, String{"x"}},
|
||||
Variable{"rule_A", VariableTypeNamed, String{"x"}},
|
||||
},
|
||||
{
|
||||
String{"y"},
|
||||
|
|
@ -314,8 +312,8 @@ describe("extract_tokens", []() {
|
|||
it("handles inline extra tokens that match tokens in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
InternedVariable{"rule_A", VariableTypeNamed, String{"x"}},
|
||||
InternedVariable{"rule_B", VariableTypeNamed, String{"y"}},
|
||||
Variable{"rule_A", VariableTypeNamed, String{"x"}},
|
||||
Variable{"rule_B", VariableTypeNamed, String{"y"}},
|
||||
},
|
||||
{
|
||||
String{"y"},
|
||||
|
|
@ -332,17 +330,17 @@ describe("extract_tokens", []() {
|
|||
it("updates extra symbols according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"w"}, String{"x"}, Symbol::non_terminal(1) })
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
String{"y"}
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_C",
|
||||
VariableTypeNamed,
|
||||
String{"z"}
|
||||
|
|
@ -367,12 +365,12 @@ describe("extract_tokens", []() {
|
|||
it("returns an error if any extra tokens are non-token symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"x"}, Symbol::non_terminal(1) })
|
||||
},
|
||||
InternedVariable{
|
||||
Variable{
|
||||
"rule_B",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ String{"y"}, String{"z"} })
|
||||
|
|
@ -428,7 +426,7 @@ describe("extract_tokens", []() {
|
|||
{},
|
||||
{},
|
||||
{
|
||||
ExternalToken {"rule_A", VariableTypeNamed, Symbol::non_terminal(0)}
|
||||
Variable{"rule_A", VariableTypeNamed, Symbol::non_terminal(0)}
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ describe("intern_symbols", []() {
|
|||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<prepare_grammar::InternedGrammar::Variable>{
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>{
|
||||
{"x", VariableTypeNamed, Rule::choice({ Symbol::non_terminal(1), Symbol::non_terminal(2) })},
|
||||
{"y", VariableTypeNamed, Symbol::non_terminal(2)},
|
||||
{"_z", VariableTypeHidden, String{"stuff"}},
|
||||
|
|
@ -74,28 +74,28 @@ describe("intern_symbols", []() {
|
|||
{},
|
||||
{},
|
||||
{
|
||||
ExternalToken{
|
||||
Variable{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
NONE()
|
||||
NamedSymbol{"w"}
|
||||
},
|
||||
ExternalToken{
|
||||
Variable{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
NONE()
|
||||
NamedSymbol{"z"}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>{
|
||||
ExternalToken{
|
||||
AssertThat(result.first.external_tokens, Equals(vector<Variable>{
|
||||
Variable{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
rules::NONE()
|
||||
Symbol::external(0)
|
||||
},
|
||||
ExternalToken{
|
||||
Variable{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
Symbol::non_terminal(2)
|
||||
|
|
|
|||
41
test/fixtures/test_grammars/external_and_internal_anonymous_tokens/corpus.txt
vendored
Normal file
41
test/fixtures/test_grammars/external_and_internal_anonymous_tokens/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
=========================================
|
||||
single-line statements - internal tokens
|
||||
=========================================
|
||||
|
||||
a b
|
||||
|
||||
---
|
||||
|
||||
(statement (variable) (variable))
|
||||
|
||||
=========================================
|
||||
multi-line statements - internal tokens
|
||||
=========================================
|
||||
|
||||
a
|
||||
b
|
||||
|
||||
---
|
||||
|
||||
(statement (variable) (variable))
|
||||
|
||||
=========================================
|
||||
single-line statements - external tokens
|
||||
=========================================
|
||||
|
||||
'hello' 'world'
|
||||
|
||||
---
|
||||
|
||||
(statement (string) (string))
|
||||
|
||||
=========================================
|
||||
multi-line statements - external tokens
|
||||
=========================================
|
||||
|
||||
'hello'
|
||||
'world'
|
||||
|
||||
---
|
||||
|
||||
(statement (string) (string))
|
||||
35
test/fixtures/test_grammars/external_and_internal_anonymous_tokens/grammar.json
vendored
Normal file
35
test/fixtures/test_grammars/external_and_internal_anonymous_tokens/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
{
|
||||
"name": "external_and_internal_anonymous_tokens",
|
||||
|
||||
"externals": [
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "STRING", "value": "\n"}
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "STRING", "value": "\n"}
|
||||
]
|
||||
},
|
||||
|
||||
"_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{"type": "SYMBOL", "name": "number"}
|
||||
]
|
||||
},
|
||||
|
||||
"variable": {"type": "PATTERN", "value": "\\a+"},
|
||||
"number": {"type": "PATTERN", "value": "\\d+"}
|
||||
}
|
||||
}
|
||||
1
test/fixtures/test_grammars/external_and_internal_anonymous_tokens/readme.md
vendored
Normal file
1
test/fixtures/test_grammars/external_and_internal_anonymous_tokens/readme.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
This grammar is just like the `external_and_internal_tokens` grammar, except that the shared external token is *anonymous*; it's specified as a string in the grammar.
|
||||
23
test/fixtures/test_grammars/external_and_internal_anonymous_tokens/scanner.c
vendored
Normal file
23
test/fixtures/test_grammars/external_and_internal_anonymous_tokens/scanner.c
vendored
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
#include "../external_and_internal_tokens/scanner.c"
|
||||
|
||||
void *tree_sitter_external_and_internal_anonymous_tokens_external_scanner_create() { return NULL; }
|
||||
|
||||
void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_destroy(void *payload) {}
|
||||
|
||||
void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_reset(void *payload) {}
|
||||
|
||||
bool tree_sitter_external_and_internal_anonymous_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
|
||||
|
||||
void tree_sitter_external_and_internal_anonymous_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
|
||||
|
||||
bool tree_sitter_external_and_internal_anonymous_tokens_external_scanner_scan(
|
||||
void *payload,
|
||||
TSLexer *lexer,
|
||||
const bool *whitelist
|
||||
) {
|
||||
return tree_sitter_external_and_internal_tokens_external_scanner_scan(
|
||||
payload,
|
||||
lexer,
|
||||
whitelist
|
||||
);
|
||||
}
|
||||
|
|
@ -2,8 +2,8 @@
|
|||
"name": "external_and_internal_tokens",
|
||||
|
||||
"externals": [
|
||||
"string",
|
||||
"line_break"
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "SYMBOL", "name": "line_break"}
|
||||
],
|
||||
|
||||
"extras": [
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
"name": "external_extra_tokens",
|
||||
|
||||
"externals": [
|
||||
"comment"
|
||||
{"type": "SYMBOL", "name": "comment"}
|
||||
],
|
||||
|
||||
"extras": [
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
"name": "external_tokens",
|
||||
|
||||
"externals": [
|
||||
"_percent_string",
|
||||
"_percent_string_start",
|
||||
"_percent_string_end"
|
||||
{"type": "SYMBOL", "name": "_percent_string"},
|
||||
{"type": "SYMBOL", "name": "_percent_string_start"},
|
||||
{"type": "SYMBOL", "name": "_percent_string_end"}
|
||||
],
|
||||
|
||||
"extras": [
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ ostream &operator<<(ostream &stream, const Rule &rule) {
|
|||
|
||||
} // namespace rules
|
||||
|
||||
ostream &operator<<(ostream &stream, const InputGrammar::Variable &variable) {
|
||||
ostream &operator<<(ostream &stream, const Variable &variable) {
|
||||
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
|
||||
}
|
||||
|
||||
|
|
@ -165,18 +165,6 @@ ostream &operator<<(ostream &stream, const PrecedenceRange &range) {
|
|||
}
|
||||
}
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
ostream &operator<<(ostream &stream, const prepare_grammar::InternedGrammar::Variable &variable) {
|
||||
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const prepare_grammar::InitialSyntaxGrammar::Variable &variable) {
|
||||
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
ostream &operator<<(ostream &stream, const LexItem &item) {
|
||||
|
|
|
|||
|
|
@ -111,6 +111,7 @@ ostream &operator<<(ostream &, const CompileError &);
|
|||
ostream &operator<<(ostream &, const ExternalToken &);
|
||||
ostream &operator<<(ostream &, const ProductionStep &);
|
||||
ostream &operator<<(ostream &, const PrecedenceRange &);
|
||||
ostream &operator<<(ostream &, const Variable &);
|
||||
ostream &operator<<(ostream &, const LexicalVariable &);
|
||||
|
||||
namespace rules {
|
||||
|
|
@ -130,13 +131,6 @@ ostream &operator<<(ostream &stream, const Rule &rule);
|
|||
|
||||
} // namespace rules
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
ostream &operator<<(ostream &, const InitialSyntaxGrammar::Variable &);
|
||||
ostream &operator<<(ostream &, const InternedGrammar::Variable &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
class LexItem;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue