Rename ubiquitous_tokens -> extra_tokens in compiler API
They were already called this in the runtime code. 'Extra' is just easier to say.
This commit is contained in:
parent
f065eb0480
commit
1c6ad5f7e4
22 changed files with 51 additions and 51 deletions
|
|
@ -74,10 +74,10 @@ int main() {
|
|||
{ "variable", pattern("[a-zA-Z]+\\w*") },
|
||||
{ "comment", pattern("//.*") },
|
||||
|
||||
}).ubiquitous_tokens({
|
||||
}).extra_tokens({
|
||||
|
||||
// Things that can appear anywhere in the language are expressed as
|
||||
// 'ubiquitous tokens'.
|
||||
// 'extra tokens'.
|
||||
sym("comment"),
|
||||
pattern("\\s+")
|
||||
});
|
||||
|
|
|
|||
|
|
@ -29,15 +29,15 @@ rule_ptr token(const rule_ptr &rule);
|
|||
|
||||
class Grammar {
|
||||
const std::vector<std::pair<std::string, rule_ptr>> rules_;
|
||||
std::vector<rule_ptr> ubiquitous_tokens_;
|
||||
std::vector<rule_ptr> extra_tokens_;
|
||||
std::vector<std::vector<std::string>> expected_conflicts_;
|
||||
|
||||
public:
|
||||
explicit Grammar(const std::vector<std::pair<std::string, rule_ptr>> &);
|
||||
Grammar &ubiquitous_tokens(const std::vector<rule_ptr> &);
|
||||
Grammar &extra_tokens(const std::vector<rule_ptr> &);
|
||||
Grammar &expected_conflicts(const std::vector<std::vector<std::string>> &);
|
||||
const std::vector<std::pair<std::string, rule_ptr>> &rules() const;
|
||||
const std::vector<rule_ptr> &ubiquitous_tokens() const;
|
||||
const std::vector<rule_ptr> &extra_tokens() const;
|
||||
const std::vector<std::vector<std::string>> &expected_conflicts() const;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -141,8 +141,8 @@ describe("extract_tokens", []() {
|
|||
})));
|
||||
});
|
||||
|
||||
describe("handling ubiquitous tokens", [&]() {
|
||||
it("adds inline ubiquitous tokens to the lexical grammar's separators", [&]() {
|
||||
describe("handling extra tokens", [&]() {
|
||||
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
}, {
|
||||
|
|
@ -156,10 +156,10 @@ describe("extract_tokens", []() {
|
|||
AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
|
||||
AssertThat(get<1>(result).separators[1], EqualsPointer(pattern("\\s+")));
|
||||
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, IsEmpty());
|
||||
AssertThat(get<0>(result).extra_tokens, IsEmpty());
|
||||
});
|
||||
|
||||
it("handles inline ubiquitous tokens that match tokens in the grammar", [&]() {
|
||||
it("handles inline extra tokens that match tokens in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
|
|
@ -169,10 +169,10 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
|
||||
});
|
||||
|
||||
it("updates ubiquitous symbols according to the new symbol numbers", [&]() {
|
||||
it("updates extra symbols according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
|
|
@ -183,14 +183,14 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
|
||||
AssertThat(get<0>(result).ubiquitous_tokens, Equals(set<Symbol>({
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(3, true) },
|
||||
})));
|
||||
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
});
|
||||
|
||||
it("returns an error if any ubiquitous tokens are non-token symbols", [&]() {
|
||||
it("returns an error if any extra tokens are non-token symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
|
|
@ -202,7 +202,7 @@ describe("extract_tokens", []() {
|
|||
"Not a token: rule_B")));
|
||||
});
|
||||
|
||||
it("returns an error if any ubiquitous tokens are non-token rules", [&]() {
|
||||
it("returns an error if any extra tokens are non-token rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
|
|
|
|||
|
|
@ -38,18 +38,18 @@ describe("intern_symbols", []() {
|
|||
});
|
||||
});
|
||||
|
||||
it("translates the grammar's optional 'ubiquitous_tokens' to numerical symbols", [&]() {
|
||||
it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
|
||||
auto grammar = Grammar({
|
||||
{ "x", choice({ sym("y"), sym("z") }) },
|
||||
{ "y", sym("z") },
|
||||
{ "z", str("stuff") }
|
||||
}).ubiquitous_tokens({ sym("z") });
|
||||
}).extra_tokens({ sym("z") });
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first.ubiquitous_tokens.size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.ubiquitous_tokens.begin(), EqualsPointer(i_sym(2)));
|
||||
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
2
spec/fixtures/grammars/anonymous_tokens.cc
vendored
2
spec/fixtures/grammars/anonymous_tokens.cc
vendored
|
|
@ -9,7 +9,7 @@ extern const Grammar anonymous_tokens = Grammar({
|
|||
str("\r"),
|
||||
pattern("\\d"),
|
||||
str("\"hello\"") }) },
|
||||
}).ubiquitous_tokens({
|
||||
}).extra_tokens({
|
||||
pattern("\\s"),
|
||||
});
|
||||
|
||||
|
|
|
|||
2
spec/fixtures/grammars/arithmetic.cc
vendored
2
spec/fixtures/grammars/arithmetic.cc
vendored
|
|
@ -34,7 +34,7 @@ extern const Grammar arithmetic = Grammar({
|
|||
{ "variable", pattern("[a-zA-Z\u03b1-\u03c9]+\\d*") },
|
||||
|
||||
{ "comment", pattern("#.*") },
|
||||
}).ubiquitous_tokens({
|
||||
}).extra_tokens({
|
||||
sym("comment"),
|
||||
pattern("\\s"),
|
||||
});
|
||||
|
|
|
|||
2
spec/fixtures/grammars/c.cc
vendored
2
spec/fixtures/grammars/c.cc
vendored
|
|
@ -251,7 +251,7 @@ extern const Grammar c = Grammar({
|
|||
pattern("[^\\*]"),
|
||||
pattern("\\*[^/]") })),
|
||||
str("*/") }) })) },
|
||||
}).ubiquitous_tokens({
|
||||
}).extra_tokens({
|
||||
sym("comment"),
|
||||
pattern("[ \t\r\n]"),
|
||||
}).expected_conflicts({
|
||||
|
|
|
|||
2
spec/fixtures/grammars/cpp.cc
vendored
2
spec/fixtures/grammars/cpp.cc
vendored
|
|
@ -211,7 +211,7 @@ extern const Grammar cpp = Grammar({
|
|||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
|
||||
{ "comment", pattern("//[^\n]*") },
|
||||
}).ubiquitous_tokens({
|
||||
}).extra_tokens({
|
||||
sym("comment"),
|
||||
pattern("[ \t\r\n]"),
|
||||
}).expected_conflicts({
|
||||
|
|
|
|||
2
spec/fixtures/grammars/golang.cc
vendored
2
spec/fixtures/grammars/golang.cc
vendored
|
|
@ -203,7 +203,7 @@ extern const Grammar golang = Grammar({
|
|||
|
||||
{ "comment", pattern("//[^\n]*") },
|
||||
|
||||
}).ubiquitous_tokens({
|
||||
}).extra_tokens({
|
||||
sym("comment"),
|
||||
sym("_line_break"),
|
||||
pattern("[ \t\r]"),
|
||||
|
|
|
|||
2
spec/fixtures/grammars/javascript.cc
vendored
2
spec/fixtures/grammars/javascript.cc
vendored
|
|
@ -349,7 +349,7 @@ extern const Grammar javascript = Grammar({
|
|||
str(")"),
|
||||
sym("statement_block") }) },
|
||||
|
||||
}).ubiquitous_tokens({
|
||||
}).extra_tokens({
|
||||
sym("comment"),
|
||||
sym("_line_break"),
|
||||
pattern("[ \t\r]"),
|
||||
|
|
|
|||
2
spec/fixtures/grammars/json.cc
vendored
2
spec/fixtures/grammars/json.cc
vendored
|
|
@ -22,7 +22,7 @@ extern const Grammar json = Grammar({
|
|||
{ "null", str("null") },
|
||||
{ "true", str("true") },
|
||||
{ "false", str("false") },
|
||||
}).ubiquitous_tokens({
|
||||
}).extra_tokens({
|
||||
pattern("\\s"),
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -151,9 +151,9 @@ describe("Parser", [&]() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("handling ubiquitous tokens", [&]() {
|
||||
describe("handling extra tokens", [&]() {
|
||||
// In the javascript example grammar, ASI works by using newlines as
|
||||
// terminators in statements, but also as ubiquitous tokens.
|
||||
// terminators in statements, but also as extra tokens.
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, ts_language_javascript());
|
||||
});
|
||||
|
|
@ -180,7 +180,7 @@ describe("Parser", [&]() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("when several ubiquitous tokens appear in a row", [&]() {
|
||||
describe("when several extra tokens appear in a row", [&]() {
|
||||
it("is incorporated into the tree", [&]() {
|
||||
set_text(
|
||||
"fn()\n\n"
|
||||
|
|
@ -337,7 +337,7 @@ describe("Parser", [&]() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("into a node containing a ubiquitous token", [&]() {
|
||||
describe("into a node containing a extra token", [&]() {
|
||||
before_each([&]() {
|
||||
set_text("123 *\n"
|
||||
"# a-comment\n"
|
||||
|
|
|
|||
|
|
@ -134,22 +134,22 @@ class ParseTableBuilder {
|
|||
|
||||
void add_shift_extra_actions(ParseStateId state_id) {
|
||||
ParseAction action = ParseAction::ShiftExtra();
|
||||
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens)
|
||||
add_action(state_id, ubiquitous_symbol, action, null_item_set);
|
||||
for (const Symbol &extra_symbol : grammar.extra_tokens)
|
||||
add_action(state_id, extra_symbol, action, null_item_set);
|
||||
}
|
||||
|
||||
void add_reduce_extra_actions(ParseStateId state_id) {
|
||||
const ParseState &state = parse_table.states[state_id];
|
||||
|
||||
for (const Symbol &ubiquitous_symbol : grammar.ubiquitous_tokens) {
|
||||
const auto &actions_for_symbol = state.actions.find(ubiquitous_symbol);
|
||||
for (const Symbol &extra_symbol : grammar.extra_tokens) {
|
||||
const auto &actions_for_symbol = state.actions.find(extra_symbol);
|
||||
if (actions_for_symbol == state.actions.end())
|
||||
continue;
|
||||
|
||||
for (const ParseAction &action : actions_for_symbol->second)
|
||||
if (action.type == ParseActionTypeShift && !action.extra) {
|
||||
size_t dest_state_id = action.state_index;
|
||||
ParseAction reduce_extra = ParseAction::ReduceExtra(ubiquitous_symbol);
|
||||
ParseAction reduce_extra = ParseAction::ReduceExtra(extra_symbol);
|
||||
for (const auto &pair : state.actions)
|
||||
add_action(dest_state_id, pair.first, reduce_extra, null_item_set);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ class CCodeGenerator {
|
|||
|
||||
add(", ");
|
||||
|
||||
if (syntax_grammar.ubiquitous_tokens.count(symbol))
|
||||
if (syntax_grammar.extra_tokens.count(symbol))
|
||||
add(".extra = true");
|
||||
else
|
||||
add(".extra = false");
|
||||
|
|
|
|||
|
|
@ -9,22 +9,22 @@ using std::string;
|
|||
using std::vector;
|
||||
|
||||
Grammar::Grammar(const vector<pair<string, rule_ptr>> &rules)
|
||||
: rules_(rules), ubiquitous_tokens_({}) {}
|
||||
: rules_(rules), extra_tokens_({}) {}
|
||||
|
||||
const vector<pair<string, rule_ptr>> &Grammar::rules() const {
|
||||
return rules_;
|
||||
}
|
||||
|
||||
const vector<rule_ptr> &Grammar::ubiquitous_tokens() const {
|
||||
return ubiquitous_tokens_;
|
||||
const vector<rule_ptr> &Grammar::extra_tokens() const {
|
||||
return extra_tokens_;
|
||||
}
|
||||
|
||||
const vector<vector<string>> &Grammar::expected_conflicts() const {
|
||||
return expected_conflicts_;
|
||||
}
|
||||
|
||||
Grammar &Grammar::ubiquitous_tokens(const vector<rule_ptr> &ubiquitous_tokens) {
|
||||
ubiquitous_tokens_ = ubiquitous_tokens;
|
||||
Grammar &Grammar::extra_tokens(const vector<rule_ptr> &extra_tokens) {
|
||||
extra_tokens_ = extra_tokens;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
|
||||
InitialSyntaxGrammar result;
|
||||
result.variables = grammar.variables;
|
||||
result.ubiquitous_tokens = grammar.ubiquitous_tokens;
|
||||
result.extra_tokens = grammar.extra_tokens;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
|
||||
ExpandRepeats expander(result.variables.size());
|
||||
|
|
|
|||
|
|
@ -147,18 +147,18 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens
|
|||
}
|
||||
|
||||
/*
|
||||
* The grammar's ubiquitous tokens can be either token rules or symbols
|
||||
* The grammar's extra tokens can be either token rules or symbols
|
||||
* pointing to token rules. If they are symbols, then they'll be handled by
|
||||
* the parser; add them to the syntax grammar's ubiqutous tokens. If they
|
||||
* are anonymous rules, they can be handled by the lexer; add them to the
|
||||
* lexical grammar's separator rules.
|
||||
*/
|
||||
for (const rule_ptr &rule : grammar.ubiquitous_tokens) {
|
||||
for (const rule_ptr &rule : grammar.extra_tokens) {
|
||||
int i = 0;
|
||||
bool used_elsewhere_in_grammar = false;
|
||||
for (const Variable &variable : lexical_grammar.variables) {
|
||||
if (variable.rule->operator==(*rule)) {
|
||||
syntax_grammar.ubiquitous_tokens.insert(Symbol(i, true));
|
||||
syntax_grammar.extra_tokens.insert(Symbol(i, true));
|
||||
used_elsewhere_in_grammar = true;
|
||||
}
|
||||
i++;
|
||||
|
|
@ -183,7 +183,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens
|
|||
syntax_grammar, lexical_grammar,
|
||||
ubiq_token_err(syntax_grammar.variables[new_symbol.index].name));
|
||||
|
||||
syntax_grammar.ubiquitous_tokens.insert(new_symbol);
|
||||
syntax_grammar.extra_tokens.insert(new_symbol);
|
||||
}
|
||||
|
||||
return make_tuple(syntax_grammar, lexical_grammar, nullptr);
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class FlattenRule : public rules::RuleFn<void> {
|
|||
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
||||
SyntaxGrammar result;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
result.ubiquitous_tokens = grammar.ubiquitous_tokens;
|
||||
result.extra_tokens = grammar.extra_tokens;
|
||||
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
vector<Production> productions;
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ namespace prepare_grammar {
|
|||
|
||||
struct InitialSyntaxGrammar {
|
||||
std::vector<Variable> variables;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -61,11 +61,11 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
|
|||
new_rule));
|
||||
}
|
||||
|
||||
for (auto &rule : grammar.ubiquitous_tokens()) {
|
||||
for (auto &rule : grammar.extra_tokens()) {
|
||||
auto new_rule = interner.apply(rule);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
result.ubiquitous_tokens.push_back(new_rule);
|
||||
result.extra_tokens.push_back(new_rule);
|
||||
}
|
||||
|
||||
for (auto &names : grammar.expected_conflicts()) {
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ namespace prepare_grammar {
|
|||
|
||||
struct InternedGrammar {
|
||||
std::vector<Variable> variables;
|
||||
std::vector<rule_ptr> ubiquitous_tokens;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ struct SyntaxGrammar {
|
|||
const std::vector<Production> &productions(const rules::Symbol &) const;
|
||||
|
||||
std::vector<SyntaxVariable> variables;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue