Fix transitions for sequences whose left side can be blank
This commit is contained in:
parent
5776846227
commit
8208aae060
6 changed files with 98 additions and 34 deletions
|
|
@ -1,8 +1,8 @@
|
|||
#include "spec_helper.h"
|
||||
#include "rule_transitions.h"
|
||||
|
||||
using build_tables::rule_transitions;
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -68,6 +68,22 @@ describe("rule transitions", []() {
|
|||
})));
|
||||
});
|
||||
|
||||
it("handles sequences whose left sides can be blank", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(seq({
|
||||
choice({
|
||||
sym("x"),
|
||||
blank(),
|
||||
}),
|
||||
seq({
|
||||
sym("x"),
|
||||
sym("y")
|
||||
})
|
||||
})), Equals(transition_map<Rule, Rule>({
|
||||
{ sym("x"), choice({ seq({ sym("x"), sym("y") }), sym("y"), }) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles choices with common starting symbols", [&]() {
|
||||
AssertThat(
|
||||
rule_transitions(
|
||||
|
|
@ -127,4 +143,18 @@ describe("rule transitions", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("checking if rules can be blank", [&]() {
|
||||
it("handles sequences", [&]() {
|
||||
rule_ptr rule = seq({
|
||||
choice({
|
||||
str("x"),
|
||||
blank(),
|
||||
}),
|
||||
str("y"),
|
||||
});
|
||||
|
||||
AssertThat(rule_can_be_blank(rule), Equals(false));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -8,12 +8,12 @@ START_TEST
|
|||
describe("compiling grammars", []() {
|
||||
string test_parser_dir = src_dir() + "/spec/fixtures/parsers";
|
||||
|
||||
it("works for the arithmetic grammar", [&]() {
|
||||
it("compiles the arithmetic grammar", [&]() {
|
||||
Grammar grammar = test_grammars::arithmetic();
|
||||
ofstream(test_parser_dir + "/arithmetic.c") << compile(grammar, "arithmetic");
|
||||
});
|
||||
|
||||
it("works for the json grammar", [&]() {
|
||||
it("compiles the json grammar", [&]() {
|
||||
Grammar grammar = test_grammars::json();
|
||||
ofstream(test_parser_dir + "/json.c") << compile(grammar, "json");
|
||||
});
|
||||
|
|
|
|||
40
spec/fixtures/parsers/json.c
vendored
40
spec/fixtures/parsers/json.c
vendored
|
|
@ -83,22 +83,26 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(13);
|
||||
LEX_ERROR(1, EXPECT({"<word>"}));
|
||||
case 13:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(14);
|
||||
if (isalnum(LOOKAHEAD_CHAR()))
|
||||
ADVANCE(13);
|
||||
ACCEPT_TOKEN(ts_symbol_string);
|
||||
LEX_ERROR(2, EXPECT({"<word>", "'\"'"}));
|
||||
case 14:
|
||||
if (LOOKAHEAD_CHAR() == '}')
|
||||
ADVANCE(15);
|
||||
LEX_ERROR(1, EXPECT({"'}'"}));
|
||||
ACCEPT_TOKEN(ts_symbol_string);
|
||||
case 15:
|
||||
ACCEPT_TOKEN(ts_symbol_7);
|
||||
if (LOOKAHEAD_CHAR() == '}')
|
||||
ADVANCE(16);
|
||||
LEX_ERROR(1, EXPECT({"'}'"}));
|
||||
case 16:
|
||||
if (LOOKAHEAD_CHAR() == ':')
|
||||
ADVANCE(17);
|
||||
LEX_ERROR(1, EXPECT({"':'"}));
|
||||
ACCEPT_TOKEN(ts_symbol_7);
|
||||
case 17:
|
||||
ACCEPT_TOKEN(ts_symbol_5);
|
||||
if (LOOKAHEAD_CHAR() == ':')
|
||||
ADVANCE(18);
|
||||
LEX_ERROR(1, EXPECT({"':'"}));
|
||||
case 18:
|
||||
ACCEPT_TOKEN(ts_symbol_5);
|
||||
case 19:
|
||||
if (LOOKAHEAD_CHAR() == '\"')
|
||||
ADVANCE(12);
|
||||
LEX_ERROR(1, EXPECT({"'\"'"}));
|
||||
|
|
@ -204,7 +208,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"2"}));
|
||||
}
|
||||
case 7:
|
||||
SET_LEX_STATE(18);
|
||||
SET_LEX_STATE(19);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_string:
|
||||
SHIFT(8);
|
||||
|
|
@ -212,7 +216,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"string"}));
|
||||
}
|
||||
case 8:
|
||||
SET_LEX_STATE(16);
|
||||
SET_LEX_STATE(17);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_5:
|
||||
SHIFT(9);
|
||||
|
|
@ -292,7 +296,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"2"}));
|
||||
}
|
||||
case 15:
|
||||
SET_LEX_STATE(18);
|
||||
SET_LEX_STATE(19);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_string:
|
||||
SHIFT(16);
|
||||
|
|
@ -300,7 +304,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"string"}));
|
||||
}
|
||||
case 16:
|
||||
SET_LEX_STATE(16);
|
||||
SET_LEX_STATE(17);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_5:
|
||||
SHIFT(17);
|
||||
|
|
@ -336,7 +340,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"6"}));
|
||||
}
|
||||
case 19:
|
||||
SET_LEX_STATE(14);
|
||||
SET_LEX_STATE(15);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_7:
|
||||
SHIFT(20);
|
||||
|
|
@ -368,7 +372,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"6"}));
|
||||
}
|
||||
case 23:
|
||||
SET_LEX_STATE(14);
|
||||
SET_LEX_STATE(15);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_7:
|
||||
SHIFT(24);
|
||||
|
|
@ -408,7 +412,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"__END__"}));
|
||||
}
|
||||
case 28:
|
||||
SET_LEX_STATE(18);
|
||||
SET_LEX_STATE(19);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_string:
|
||||
SHIFT(29);
|
||||
|
|
@ -416,7 +420,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"string"}));
|
||||
}
|
||||
case 29:
|
||||
SET_LEX_STATE(16);
|
||||
SET_LEX_STATE(17);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_5:
|
||||
SHIFT(30);
|
||||
|
|
@ -452,7 +456,7 @@ static TSParseResult ts_parse(const char *input) {
|
|||
PARSE_ERROR(1, EXPECT({"6"}));
|
||||
}
|
||||
case 32:
|
||||
SET_LEX_STATE(14);
|
||||
SET_LEX_STATE(15);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_7:
|
||||
SHIFT(33);
|
||||
|
|
|
|||
|
|
@ -14,9 +14,14 @@ describe("json", []() {
|
|||
});
|
||||
|
||||
it("parses strings", [&]() {
|
||||
TSDocumentSetText(document, "\"foo\"");
|
||||
TSDocumentSetText(document, "\"string\"");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (string))"));
|
||||
});
|
||||
|
||||
it("parses objects", [&]() {
|
||||
TSDocumentSetText(document, "{\"key1\":1}");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))"));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -13,10 +13,6 @@ namespace tree_sitter {
|
|||
public:
|
||||
transition_map<Rule, Rule> value;
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
value = transition_map<Rule, Rule>({{ blank(), blank() }});
|
||||
}
|
||||
|
||||
void visit(const Character *rule) {
|
||||
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
|
||||
}
|
||||
|
|
@ -39,6 +35,11 @@ namespace tree_sitter {
|
|||
else
|
||||
return seq({ left_rule, rule->right });
|
||||
});
|
||||
if (rule_can_be_blank(rule->left)) {
|
||||
value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return choice({ left, right });
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
|
|
@ -59,17 +60,41 @@ namespace tree_sitter {
|
|||
}
|
||||
};
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule) {
|
||||
for (auto pair : rule_transitions(rule))
|
||||
if (is_blank(pair.first))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
transition_map<Rule, Rule> rule_transitions(const rule_ptr &rule) {
|
||||
TransitionsVisitor visitor;
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
}
|
||||
|
||||
class EpsilonVisitor : public rules::Visitor {
|
||||
public:
|
||||
bool value;
|
||||
|
||||
void default_visit(const Rule *) {
|
||||
value = false;
|
||||
}
|
||||
|
||||
void visit(const Blank *) {
|
||||
value = true;
|
||||
}
|
||||
|
||||
void visit(const Choice *rule) {
|
||||
value = rule_can_be_blank(rule->left) || rule_can_be_blank(rule->right);
|
||||
}
|
||||
|
||||
void visit(const Seq *rule) {
|
||||
value = rule_can_be_blank(rule->left) && rule_can_be_blank(rule->right);
|
||||
}
|
||||
|
||||
void visit(const Repeat *rule) {
|
||||
value = rule_can_be_blank(rule->content);
|
||||
}
|
||||
};
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule) {
|
||||
EpsilonVisitor visitor;
|
||||
rule->accept(visitor);
|
||||
return visitor.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ const char * TSParseErrorToString(const TSParseError *error, const char *input_s
|
|||
result = string("Unexpected token ") + symbol_names[error->lookahead_sym] + ". ";
|
||||
break;
|
||||
case TSParseErrorTypeLexical:
|
||||
result = string("Unexpected character '") + (input_string + error->position) + "'. ";
|
||||
result = string("Unexpected character '") + input_string[error->position] + "'. ";
|
||||
break;
|
||||
default:
|
||||
return EMPTY;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue