Fix transitions for sequences whose left side can be blank

This commit is contained in:
Max Brunsfeld 2014-01-23 23:56:37 -08:00
parent 5776846227
commit 8208aae060
6 changed files with 98 additions and 34 deletions

View file

@ -1,8 +1,8 @@
#include "spec_helper.h"
#include "rule_transitions.h"
using build_tables::rule_transitions;
using namespace rules;
using namespace build_tables;
START_TEST
@ -68,6 +68,22 @@ describe("rule transitions", []() {
})));
});
it("handles sequences whose left sides can be blank", [&]() {
AssertThat(
rule_transitions(seq({
choice({
sym("x"),
blank(),
}),
seq({
sym("x"),
sym("y")
})
})), Equals(transition_map<Rule, Rule>({
{ sym("x"), choice({ seq({ sym("x"), sym("y") }), sym("y"), }) }
})));
});
it("handles choices with common starting symbols", [&]() {
AssertThat(
rule_transitions(
@ -127,4 +143,18 @@ describe("rule transitions", []() {
});
});
describe("checking if rules can be blank", [&]() {
it("handles sequences", [&]() {
rule_ptr rule = seq({
choice({
str("x"),
blank(),
}),
str("y"),
});
AssertThat(rule_can_be_blank(rule), Equals(false));
});
});
END_TEST

View file

@ -8,12 +8,12 @@ START_TEST
describe("compiling grammars", []() {
string test_parser_dir = src_dir() + "/spec/fixtures/parsers";
it("works for the arithmetic grammar", [&]() {
it("compiles the arithmetic grammar", [&]() {
Grammar grammar = test_grammars::arithmetic();
ofstream(test_parser_dir + "/arithmetic.c") << compile(grammar, "arithmetic");
});
it("works for the json grammar", [&]() {
it("compiles the json grammar", [&]() {
Grammar grammar = test_grammars::json();
ofstream(test_parser_dir + "/json.c") << compile(grammar, "json");
});

View file

@ -83,22 +83,26 @@ static void ts_lex(TSParser *parser) {
ADVANCE(13);
LEX_ERROR(1, EXPECT({"<word>"}));
case 13:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(14);
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(13);
ACCEPT_TOKEN(ts_symbol_string);
LEX_ERROR(2, EXPECT({"<word>", "'\"'"}));
case 14:
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(15);
LEX_ERROR(1, EXPECT({"'}'"}));
ACCEPT_TOKEN(ts_symbol_string);
case 15:
ACCEPT_TOKEN(ts_symbol_7);
if (LOOKAHEAD_CHAR() == '}')
ADVANCE(16);
LEX_ERROR(1, EXPECT({"'}'"}));
case 16:
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(17);
LEX_ERROR(1, EXPECT({"':'"}));
ACCEPT_TOKEN(ts_symbol_7);
case 17:
ACCEPT_TOKEN(ts_symbol_5);
if (LOOKAHEAD_CHAR() == ':')
ADVANCE(18);
LEX_ERROR(1, EXPECT({"':'"}));
case 18:
ACCEPT_TOKEN(ts_symbol_5);
case 19:
if (LOOKAHEAD_CHAR() == '\"')
ADVANCE(12);
LEX_ERROR(1, EXPECT({"'\"'"}));
@ -204,7 +208,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"2"}));
}
case 7:
SET_LEX_STATE(18);
SET_LEX_STATE(19);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(8);
@ -212,7 +216,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"string"}));
}
case 8:
SET_LEX_STATE(16);
SET_LEX_STATE(17);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_5:
SHIFT(9);
@ -292,7 +296,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"2"}));
}
case 15:
SET_LEX_STATE(18);
SET_LEX_STATE(19);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(16);
@ -300,7 +304,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"string"}));
}
case 16:
SET_LEX_STATE(16);
SET_LEX_STATE(17);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_5:
SHIFT(17);
@ -336,7 +340,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"6"}));
}
case 19:
SET_LEX_STATE(14);
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(20);
@ -368,7 +372,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"6"}));
}
case 23:
SET_LEX_STATE(14);
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(24);
@ -408,7 +412,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"__END__"}));
}
case 28:
SET_LEX_STATE(18);
SET_LEX_STATE(19);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_string:
SHIFT(29);
@ -416,7 +420,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"string"}));
}
case 29:
SET_LEX_STATE(16);
SET_LEX_STATE(17);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_5:
SHIFT(30);
@ -452,7 +456,7 @@ static TSParseResult ts_parse(const char *input) {
PARSE_ERROR(1, EXPECT({"6"}));
}
case 32:
SET_LEX_STATE(14);
SET_LEX_STATE(15);
switch (LOOKAHEAD_SYM()) {
case ts_symbol_7:
SHIFT(33);

View file

@ -14,9 +14,14 @@ describe("json", []() {
});
it("parses strings", [&]() {
TSDocumentSetText(document, "\"foo\"");
TSDocumentSetText(document, "\"string\"");
AssertThat(string(TSDocumentToString(document)), Equals("(value (string))"));
});
it("parses objects", [&]() {
TSDocumentSetText(document, "{\"key1\":1}");
AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))"));
});
});
END_TEST

View file

@ -13,10 +13,6 @@ namespace tree_sitter {
public:
transition_map<Rule, Rule> value;
void visit(const Blank *rule) {
value = transition_map<Rule, Rule>({{ blank(), blank() }});
}
void visit(const Character *rule) {
value = transition_map<Rule, Rule>({{ rule->copy(), blank() }});
}
@ -39,6 +35,11 @@ namespace tree_sitter {
else
return seq({ left_rule, rule->right });
});
if (rule_can_be_blank(rule->left)) {
value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
return choice({ left, right });
});
}
}
void visit(const Repeat *rule) {
@ -59,17 +60,41 @@ namespace tree_sitter {
}
};
bool rule_can_be_blank(const rule_ptr &rule) {
for (auto pair : rule_transitions(rule))
if (is_blank(pair.first))
return true;
return false;
}
transition_map<Rule, Rule> rule_transitions(const rule_ptr &rule) {
TransitionsVisitor visitor;
rule->accept(visitor);
return visitor.value;
}
class EpsilonVisitor : public rules::Visitor {
public:
bool value;
void default_visit(const Rule *) {
value = false;
}
void visit(const Blank *) {
value = true;
}
void visit(const Choice *rule) {
value = rule_can_be_blank(rule->left) || rule_can_be_blank(rule->right);
}
void visit(const Seq *rule) {
value = rule_can_be_blank(rule->left) && rule_can_be_blank(rule->right);
}
void visit(const Repeat *rule) {
value = rule_can_be_blank(rule->content);
}
};
bool rule_can_be_blank(const rule_ptr &rule) {
EpsilonVisitor visitor;
rule->accept(visitor);
return visitor.value;
}
}
}

View file

@ -11,7 +11,7 @@ const char * TSParseErrorToString(const TSParseError *error, const char *input_s
result = string("Unexpected token ") + symbol_names[error->lookahead_sym] + ". ";
break;
case TSParseErrorTypeLexical:
result = string("Unexpected character '") + (input_string + error->position) + "'. ";
result = string("Unexpected character '") + input_string[error->position] + "'. ";
break;
default:
return EMPTY;