diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 93678b86..29303cb6 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -21,6 +21,7 @@ namespace tree_sitter { rule_ptr str(const std::string &value); rule_ptr err(const rule_ptr &rule); rule_ptr prec(int precedence, rule_ptr rule); + rule_ptr token(rule_ptr rule); } class Grammar { diff --git a/spec/compiler/build_tables/item_set_transitions_spec.cc b/spec/compiler/build_tables/item_set_transitions_spec.cc index 3ec84e1e..0b494280 100644 --- a/spec/compiler/build_tables/item_set_transitions_spec.cc +++ b/spec/compiler/build_tables/item_set_transitions_spec.cc @@ -34,14 +34,14 @@ describe("syntactic item set transitions", [&]() { { "A", blank() }, { "B", i_token(21) }, }, {}); - + it("computes the closure of the new item sets", [&]() { ParseItemSet set1({ ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), 3, Symbol(23, SymbolOptionToken)), }); - + SymTransitions sym_transitions; - + AssertThat(sym_transitions(set1, grammar), Equals(map({ { Symbol(22, SymbolOptionToken), ParseItemSet({ ParseItem(Symbol(0), i_sym(1), 4, Symbol(23, SymbolOptionToken)), diff --git a/spec/compiler/prepare_grammar/extract_tokens_spec.cc b/spec/compiler/prepare_grammar/extract_tokens_spec.cc index 58f4442b..42c3f6dd 100644 --- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc +++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc @@ -36,6 +36,22 @@ describe("extracting tokens from a grammar", []() { }))); }); + it("moves other rules marked as tokens into the lexical grammar", [&]() { + pair result = extract_tokens(PreparedGrammar({ + { "rule0", seq({ + token(choice({ str("a"), str("b") })), + i_sym(0) }) } + }, {})); + + AssertThat(result.first, Equals(PreparedGrammar({ + { "rule0", seq({ i_aux_token(0), i_sym(0) }) } + }, {}))); + + AssertThat(result.second, Equals(PreparedGrammar({}, { + { "token0", token(choice({ str("a"), str("b") })) }, + }))); + }); + it("does not extract blanks into tokens", [&]() { pair result = extract_tokens(PreparedGrammar({ { "rule1", choice({ i_sym(0), blank() }) }, @@ -66,6 +82,7 @@ describe("extracting tokens from a grammar", []() { auto result = extract_tokens(PreparedGrammar({ { "rule0", i_sym(1) }, { "rule1", pattern("a|b") }, + { "rule2", token(seq({ str("a"), str("b") })) }, }, {})); AssertThat(result.first, Equals(PreparedGrammar({ @@ -74,6 +91,7 @@ describe("extracting tokens from a grammar", []() { AssertThat(result.second, Equals(PreparedGrammar({ { "rule1", pattern("a|b") }, + { "rule2", token(seq({ str("a"), str("b") })) }, }, {}))); }); diff --git a/src/compiler/build_tables/get_metadata.cc b/src/compiler/build_tables/get_metadata.cc index f1fbfd25..c1b4d175 100644 --- a/src/compiler/build_tables/get_metadata.cc +++ b/src/compiler/build_tables/get_metadata.cc @@ -8,8 +8,7 @@ namespace tree_sitter { rules::MetadataKey metadata_key; int apply_to(const rules::Metadata *rule) { - auto pair = rule->value.find(metadata_key); - return (pair != rule->value.end()) ? pair->second : 0; + return rule->value_for(metadata_key); } public: diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 01103bcf..c4df2a29 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -11,6 +11,7 @@ #include "compiler/rules/blank.h" #include "compiler/rules/symbol.h" #include "compiler/rules/string.h" +#include "compiler/rules/metadata.h" #include "compiler/rules/pattern.h" namespace tree_sitter { @@ -28,6 +29,7 @@ namespace tree_sitter { class IsToken : public rules::RuleFn { bool apply_to(const rules::String *rule) { return true; } bool apply_to(const rules::Pattern *rule) { return true; } + bool apply_to(const rules::Metadata *rule) { return rule->value_for(rules::IS_TOKEN); } }; class SymbolInliner : public rules::IdentityRuleFn { @@ -69,16 +71,28 @@ namespace tree_sitter { return index; } + rule_ptr apply_to_token(const rules::rule_ptr rule) { + size_t index = add_token(rule); + return make_shared(index, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary)); + } + rule_ptr default_apply(const rules::Rule *rule) { auto result = rule->copy(); if (IsToken().apply(result)) { - size_t index = add_token(result); - return make_shared(index, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary)); + return apply_to_token(result); } else { return result; } } + rule_ptr apply_to(const rules::Metadata *rule) { + if (rule->value_for(rules::IS_TOKEN)) { + return apply_to_token(rule->copy()); + } else { + return make_shared(apply(rule->rule), rule->value); + } + } + public: vector> tokens; }; diff --git a/src/compiler/rules/metadata.cc b/src/compiler/rules/metadata.cc index 98f1f7b9..b45a5ceb 100644 --- a/src/compiler/rules/metadata.cc +++ b/src/compiler/rules/metadata.cc @@ -26,7 +26,14 @@ namespace tree_sitter { } rule_ptr Metadata::copy() const { - return make_shared(rule, value); + return make_shared(rule->copy(), value); + } + + int Metadata::value_for(MetadataKey key) const { + auto pair = value.find(key); + return (pair != value.end()) ? + pair->second : + 0; } std::string Metadata::to_string() const { diff --git a/src/compiler/rules/metadata.h b/src/compiler/rules/metadata.h index 916279b9..97c7b761 100644 --- a/src/compiler/rules/metadata.h +++ b/src/compiler/rules/metadata.h @@ -9,7 +9,8 @@ namespace tree_sitter { namespace rules { typedef enum { START_TOKEN, - PRECEDENCE + PRECEDENCE, + IS_TOKEN, } MetadataKey; class Metadata : public Rule { @@ -21,6 +22,7 @@ namespace tree_sitter { rule_ptr copy() const; std::string to_string() const; void accept(Visitor *visitor) const; + int value_for(MetadataKey key) const; const rule_ptr rule; const std::map value; diff --git a/src/compiler/rules/rules.cc b/src/compiler/rules/rules.cc index 8db093c0..9752ec4d 100644 --- a/src/compiler/rules/rules.cc +++ b/src/compiler/rules/rules.cc @@ -23,6 +23,10 @@ namespace tree_sitter { using std::map; namespace rules { + static rule_ptr metadata(rule_ptr rule, map values) { + return std::make_shared(rule, values); + } + rule_ptr blank() { return make_shared(); } @@ -56,9 +60,11 @@ namespace tree_sitter { } rule_ptr prec(int precedence, rule_ptr rule) { - return std::make_shared(rule, map({ - { PRECEDENCE, precedence } - })); + return metadata(rule, { { PRECEDENCE, precedence } }); + } + + rule_ptr token(rule_ptr rule) { + return metadata(rule, { { IS_TOKEN, 1 } }); } } }