Collapse rules that contain only a single token
This commit is contained in:
parent
04d18b56ed
commit
0985fa3008
7 changed files with 70 additions and 61 deletions
|
|
@ -34,6 +34,25 @@ describe("preparing a grammar", []() {
|
|||
rules::character('b') }) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("turns entire rules into tokens when they contain no symbols", [&]() {
|
||||
auto result = prepare_grammar(Grammar({
|
||||
{ "rule1", sym("rule2") },
|
||||
{ "rule2", seq({
|
||||
character('a'),
|
||||
character('b') }) }
|
||||
}));
|
||||
|
||||
AssertThat(result.first, Equals(Grammar({
|
||||
{ "rule1", sym("rule2") }
|
||||
})));
|
||||
|
||||
AssertThat(result.second, Equals(Grammar("", {
|
||||
{ "rule2", seq({
|
||||
character('a'),
|
||||
character('b') }) }
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
76
spec/fixtures/parsers/arithmetic.c
vendored
76
spec/fixtures/parsers/arithmetic.c
vendored
|
|
@ -4,15 +4,13 @@
|
|||
typedef enum {
|
||||
ts_symbol_expression,
|
||||
ts_symbol_term,
|
||||
ts_symbol_number,
|
||||
ts_symbol_factor,
|
||||
ts_symbol_variable,
|
||||
ts_symbol_6,
|
||||
ts_symbol_5,
|
||||
ts_symbol_4,
|
||||
ts_symbol_3,
|
||||
ts_symbol_2,
|
||||
ts_symbol_1,
|
||||
ts_symbol_2,
|
||||
ts_symbol_number,
|
||||
ts_symbol_variable,
|
||||
ts_symbol___END__
|
||||
} ts_symbol;
|
||||
|
||||
|
|
@ -20,23 +18,23 @@ static void ts_lex(TSParser *parser) {
|
|||
START_LEXER();
|
||||
switch (LEX_STATE()) {
|
||||
case 0:
|
||||
if (isdigit(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(3);
|
||||
if (LOOKAHEAD_CHAR() == '(')
|
||||
ADVANCE(2);
|
||||
if (isalnum(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(3);
|
||||
if (isdigit(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(2);
|
||||
if (LOOKAHEAD_CHAR() == '(')
|
||||
ADVANCE(1);
|
||||
LEX_ERROR();
|
||||
case 1:
|
||||
if (isalnum(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(1);
|
||||
ACCEPT_TOKEN(ts_symbol_1);
|
||||
case 2:
|
||||
ACCEPT_TOKEN(ts_symbol_2);
|
||||
case 3:
|
||||
if (isdigit(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(2);
|
||||
ACCEPT_TOKEN(ts_symbol_number);
|
||||
case 3:
|
||||
if (isalnum(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(3);
|
||||
ACCEPT_TOKEN(ts_symbol_4);
|
||||
ACCEPT_TOKEN(ts_symbol_variable);
|
||||
case 4:
|
||||
LEX_ERROR();
|
||||
case 5:
|
||||
|
|
@ -44,19 +42,19 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(6);
|
||||
LEX_ERROR();
|
||||
case 6:
|
||||
ACCEPT_TOKEN(ts_symbol_6);
|
||||
ACCEPT_TOKEN(ts_symbol_4);
|
||||
case 7:
|
||||
if (LOOKAHEAD_CHAR() == '*')
|
||||
ADVANCE(8);
|
||||
LEX_ERROR();
|
||||
case 8:
|
||||
ACCEPT_TOKEN(ts_symbol_5);
|
||||
ACCEPT_TOKEN(ts_symbol_3);
|
||||
case 9:
|
||||
if (LOOKAHEAD_CHAR() == ')')
|
||||
ADVANCE(10);
|
||||
LEX_ERROR();
|
||||
case 10:
|
||||
ACCEPT_TOKEN(ts_symbol_3);
|
||||
ACCEPT_TOKEN(ts_symbol_2);
|
||||
default:
|
||||
LEX_ERROR();
|
||||
}
|
||||
|
|
@ -69,8 +67,6 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case 0:
|
||||
SET_LEX_STATE(0);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_4:
|
||||
SHIFT(13);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(8);
|
||||
case ts_symbol_factor:
|
||||
|
|
@ -78,8 +74,6 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case ts_symbol_number:
|
||||
SHIFT(8);
|
||||
case ts_symbol_1:
|
||||
SHIFT(12);
|
||||
case ts_symbol_2:
|
||||
SHIFT(9);
|
||||
case ts_symbol_term:
|
||||
SHIFT(2);
|
||||
|
|
@ -99,7 +93,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case 2:
|
||||
SET_LEX_STATE(5);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_6:
|
||||
case ts_symbol_4:
|
||||
SHIFT(3);
|
||||
default:
|
||||
REDUCE(ts_symbol_expression, 1);
|
||||
|
|
@ -107,20 +101,16 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case 3:
|
||||
SET_LEX_STATE(0);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_4:
|
||||
SHIFT(13);
|
||||
case ts_symbol_1:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
SHIFT(4);
|
||||
case ts_symbol_2:
|
||||
SHIFT(9);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(8);
|
||||
case ts_symbol_factor:
|
||||
SHIFT(5);
|
||||
case ts_symbol_1:
|
||||
SHIFT(9);
|
||||
case ts_symbol_number:
|
||||
SHIFT(8);
|
||||
case ts_symbol_term:
|
||||
SHIFT(4);
|
||||
default:
|
||||
PARSE_ERROR();
|
||||
}
|
||||
|
|
@ -133,7 +123,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case 5:
|
||||
SET_LEX_STATE(7);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_5:
|
||||
case ts_symbol_3:
|
||||
SHIFT(6);
|
||||
default:
|
||||
REDUCE(ts_symbol_term, 1);
|
||||
|
|
@ -141,12 +131,8 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case 6:
|
||||
SET_LEX_STATE(0);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_4:
|
||||
SHIFT(13);
|
||||
case ts_symbol_2:
|
||||
SHIFT(9);
|
||||
case ts_symbol_1:
|
||||
SHIFT(12);
|
||||
SHIFT(9);
|
||||
case ts_symbol_number:
|
||||
SHIFT(8);
|
||||
case ts_symbol_variable:
|
||||
|
|
@ -171,8 +157,6 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case 9:
|
||||
SET_LEX_STATE(0);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_4:
|
||||
SHIFT(13);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(8);
|
||||
case ts_symbol_factor:
|
||||
|
|
@ -180,8 +164,6 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case ts_symbol_number:
|
||||
SHIFT(8);
|
||||
case ts_symbol_1:
|
||||
SHIFT(12);
|
||||
case ts_symbol_2:
|
||||
SHIFT(9);
|
||||
case ts_symbol_term:
|
||||
SHIFT(2);
|
||||
|
|
@ -193,7 +175,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
case 10:
|
||||
SET_LEX_STATE(9);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_3:
|
||||
case ts_symbol_2:
|
||||
SHIFT(11);
|
||||
default:
|
||||
PARSE_ERROR();
|
||||
|
|
@ -204,18 +186,6 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
default:
|
||||
REDUCE(ts_symbol_factor, 3);
|
||||
}
|
||||
case 12:
|
||||
SET_LEX_STATE(4);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
default:
|
||||
REDUCE(ts_symbol_variable, 1);
|
||||
}
|
||||
case 13:
|
||||
SET_LEX_STATE(4);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
default:
|
||||
REDUCE(ts_symbol_number, 1);
|
||||
}
|
||||
default:
|
||||
PARSE_ERROR();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,8 +11,22 @@ namespace tree_sitter {
|
|||
class TokenExtractor : rules::Visitor {
|
||||
public:
|
||||
rules::rule_ptr value;
|
||||
size_t anonymous_token_count = 0;
|
||||
unordered_map<string, const rules::rule_ptr> tokens;
|
||||
|
||||
rules::rule_ptr initial_apply(string name, const rules::rule_ptr rule) {
|
||||
auto result = apply(rule);
|
||||
auto symbol = std::dynamic_pointer_cast<const rules::Symbol>(result);
|
||||
if (symbol && *symbol != *rule) {
|
||||
tokens.insert({ name, tokens[symbol->name] });
|
||||
tokens.erase(symbol->name);
|
||||
anonymous_token_count--;
|
||||
return rules::rule_ptr();
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
rules::rule_ptr apply(const rules::rule_ptr rule) {
|
||||
if (search_for_symbols(rule)) {
|
||||
rule->accept(*this);
|
||||
|
|
@ -24,10 +38,10 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
string add_token(const rules::rule_ptr &rule) {
|
||||
for (auto pair : tokens) {
|
||||
if (*pair.second == *rule) return pair.first;
|
||||
}
|
||||
string name = to_string(tokens.size() + 1);
|
||||
for (auto pair : tokens)
|
||||
if (*pair.second == *rule)
|
||||
return pair.first;
|
||||
string name = to_string(++anonymous_token_count);
|
||||
tokens.insert({ name, rule });
|
||||
return name;
|
||||
}
|
||||
|
|
@ -50,8 +64,10 @@ namespace tree_sitter {
|
|||
unordered_map<string, const rules::rule_ptr> rules;
|
||||
|
||||
for (auto pair : input_grammar.rules) {
|
||||
auto new_rule = extractor.apply(pair.second);
|
||||
rules.insert({ pair.first, new_rule });
|
||||
string name = pair.first;
|
||||
auto new_rule = extractor.initial_apply(name, pair.second);
|
||||
if (new_rule.get())
|
||||
rules.insert({ name, new_rule });
|
||||
}
|
||||
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -16,7 +16,6 @@ namespace tree_sitter {
|
|||
rules(rules),
|
||||
start_rule_name(start_rule_name) {}
|
||||
|
||||
|
||||
const rules::rule_ptr Grammar::rule(const string &name) const {
|
||||
auto iter = rules.find(name);
|
||||
return (iter == rules.end()) ?
|
||||
|
|
@ -38,6 +37,7 @@ namespace tree_sitter {
|
|||
for (auto pair : rules) {
|
||||
auto other_pair = other.rules.find(pair.first);
|
||||
if (other_pair == other.rules.end()) return false;
|
||||
auto orr = other_pair->second->to_string();;
|
||||
if (!other_pair->second->operator==(*pair.second)) return false;
|
||||
}
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,6 @@ namespace tree_sitter {
|
|||
void add_shift_actions(const ItemSet &item_set, size_t state_index) {
|
||||
auto x = item_set.sym_transitions(grammar);
|
||||
for (auto transition : x) {
|
||||
|
||||
rules::Symbol symbol = *transition.first;
|
||||
ItemSet item_set = *transition.second;
|
||||
size_t new_state_index = add_parse_state(item_set);
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ using std::string;
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
bool Rule::operator!=(const Rule &other) const {
|
||||
return !this->operator==(other);
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream& stream, const Rule &rule) {
|
||||
return stream << rule.to_string();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ namespace tree_sitter {
|
|||
class Rule {
|
||||
public:
|
||||
virtual bool operator==(const Rule& other) const = 0;
|
||||
bool operator!=(const Rule& other) const;
|
||||
virtual size_t hash_code() const = 0;
|
||||
virtual rule_ptr copy() const = 0;
|
||||
virtual std::string to_string() const = 0;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue