Add token helper for building token rules
Now you can specify the structure of tokens using all of the rule functions, not just `str` and `pattern`
This commit is contained in:
parent
d685edf015
commit
6d40dcf881
8 changed files with 59 additions and 12 deletions
|
|
@ -21,6 +21,7 @@ namespace tree_sitter {
|
|||
rule_ptr str(const std::string &value);
|
||||
rule_ptr err(const rule_ptr &rule);
|
||||
rule_ptr prec(int precedence, rule_ptr rule);
|
||||
rule_ptr token(rule_ptr rule);
|
||||
}
|
||||
|
||||
class Grammar {
|
||||
|
|
|
|||
|
|
@ -34,14 +34,14 @@ describe("syntactic item set transitions", [&]() {
|
|||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {});
|
||||
|
||||
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
ParseItemSet set1({
|
||||
ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), 3, Symbol(23, SymbolOptionToken)),
|
||||
});
|
||||
|
||||
|
||||
SymTransitions sym_transitions;
|
||||
|
||||
|
||||
AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
{ Symbol(22, SymbolOptionToken), ParseItemSet({
|
||||
ParseItem(Symbol(0), i_sym(1), 4, Symbol(23, SymbolOptionToken)),
|
||||
|
|
|
|||
|
|
@ -36,6 +36,22 @@ describe("extracting tokens from a grammar", []() {
|
|||
})));
|
||||
});
|
||||
|
||||
it("moves other rules marked as tokens into the lexical grammar", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule0", seq({
|
||||
token(choice({ str("a"), str("b") })),
|
||||
i_sym(0) }) }
|
||||
}, {}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule0", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
}, {})));
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "token0", token(choice({ str("a"), str("b") })) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not extract blanks into tokens", [&]() {
|
||||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({
|
||||
{ "rule1", choice({ i_sym(0), blank() }) },
|
||||
|
|
@ -66,6 +82,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule0", i_sym(1) },
|
||||
{ "rule1", pattern("a|b") },
|
||||
{ "rule2", token(seq({ str("a"), str("b") })) },
|
||||
}, {}));
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
|
|
@ -74,6 +91,7 @@ describe("extracting tokens from a grammar", []() {
|
|||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
{ "rule1", pattern("a|b") },
|
||||
{ "rule2", token(seq({ str("a"), str("b") })) },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -8,8 +8,7 @@ namespace tree_sitter {
|
|||
rules::MetadataKey metadata_key;
|
||||
|
||||
int apply_to(const rules::Metadata *rule) {
|
||||
auto pair = rule->value.find(metadata_key);
|
||||
return (pair != rule->value.end()) ? pair->second : 0;
|
||||
return rule->value_for(metadata_key);
|
||||
}
|
||||
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -28,6 +29,7 @@ namespace tree_sitter {
|
|||
class IsToken : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::String *rule) { return true; }
|
||||
bool apply_to(const rules::Pattern *rule) { return true; }
|
||||
bool apply_to(const rules::Metadata *rule) { return rule->value_for(rules::IS_TOKEN); }
|
||||
};
|
||||
|
||||
class SymbolInliner : public rules::IdentityRuleFn {
|
||||
|
|
@ -69,16 +71,28 @@ namespace tree_sitter {
|
|||
return index;
|
||||
}
|
||||
|
||||
rule_ptr apply_to_token(const rules::rule_ptr rule) {
|
||||
size_t index = add_token(rule);
|
||||
return make_shared<rules::Symbol>(index, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary));
|
||||
}
|
||||
|
||||
rule_ptr default_apply(const rules::Rule *rule) {
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(result)) {
|
||||
size_t index = add_token(result);
|
||||
return make_shared<rules::Symbol>(index, rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary));
|
||||
return apply_to_token(result);
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Metadata *rule) {
|
||||
if (rule->value_for(rules::IS_TOKEN)) {
|
||||
return apply_to_token(rule->copy());
|
||||
} else {
|
||||
return make_shared<rules::Metadata>(apply(rule->rule), rule->value);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
vector<pair<string, rule_ptr>> tokens;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -26,7 +26,14 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
rule_ptr Metadata::copy() const {
|
||||
return make_shared<Metadata>(rule, value);
|
||||
return make_shared<Metadata>(rule->copy(), value);
|
||||
}
|
||||
|
||||
int Metadata::value_for(MetadataKey key) const {
|
||||
auto pair = value.find(key);
|
||||
return (pair != value.end()) ?
|
||||
pair->second :
|
||||
0;
|
||||
}
|
||||
|
||||
std::string Metadata::to_string() const {
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
typedef enum {
|
||||
START_TOKEN,
|
||||
PRECEDENCE
|
||||
PRECEDENCE,
|
||||
IS_TOKEN,
|
||||
} MetadataKey;
|
||||
|
||||
class Metadata : public Rule {
|
||||
|
|
@ -21,6 +22,7 @@ namespace tree_sitter {
|
|||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
int value_for(MetadataKey key) const;
|
||||
|
||||
const rule_ptr rule;
|
||||
const std::map<MetadataKey, int> value;
|
||||
|
|
|
|||
|
|
@ -23,6 +23,10 @@ namespace tree_sitter {
|
|||
using std::map;
|
||||
|
||||
namespace rules {
|
||||
static rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
|
||||
return std::make_shared<Metadata>(rule, values);
|
||||
}
|
||||
|
||||
rule_ptr blank() {
|
||||
return make_shared<Blank>();
|
||||
}
|
||||
|
|
@ -56,9 +60,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
rule_ptr prec(int precedence, rule_ptr rule) {
|
||||
return std::make_shared<Metadata>(rule, map<MetadataKey, int>({
|
||||
{ PRECEDENCE, precedence }
|
||||
}));
|
||||
return metadata(rule, { { PRECEDENCE, precedence } });
|
||||
}
|
||||
|
||||
rule_ptr token(rule_ptr rule) {
|
||||
return metadata(rule, { { IS_TOKEN, 1 } });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue