diff --git a/spec/compiler/build_tables/rule_transitions_spec.cpp b/spec/compiler/build_tables/rule_transitions_spec.cpp index d5393bb8..b31343e8 100644 --- a/spec/compiler/build_tables/rule_transitions_spec.cpp +++ b/spec/compiler/build_tables/rule_transitions_spec.cpp @@ -1,8 +1,8 @@ #include "spec_helper.h" #include "rule_transitions.h" -using build_tables::rule_transitions; using namespace rules; +using namespace build_tables; START_TEST @@ -68,6 +68,22 @@ describe("rule transitions", []() { }))); }); + it("handles sequences whose left sides can be blank", [&]() { + AssertThat( + rule_transitions(seq({ + choice({ + sym("x"), + blank(), + }), + seq({ + sym("x"), + sym("y") + }) + })), Equals(transition_map({ + { sym("x"), choice({ seq({ sym("x"), sym("y") }), sym("y"), }) } + }))); + }); + it("handles choices with common starting symbols", [&]() { AssertThat( rule_transitions( @@ -127,4 +143,18 @@ describe("rule transitions", []() { }); }); +describe("checking if rules can be blank", [&]() { + it("handles sequences", [&]() { + rule_ptr rule = seq({ + choice({ + str("x"), + blank(), + }), + str("y"), + }); + + AssertThat(rule_can_be_blank(rule), Equals(false)); + }); +}); + END_TEST diff --git a/spec/compiler/compile_fixtures.cpp b/spec/compiler/compile_fixtures.cpp index 7d8394da..1851d1d9 100644 --- a/spec/compiler/compile_fixtures.cpp +++ b/spec/compiler/compile_fixtures.cpp @@ -8,12 +8,12 @@ START_TEST describe("compiling grammars", []() { string test_parser_dir = src_dir() + "/spec/fixtures/parsers"; - it("works for the arithmetic grammar", [&]() { + it("compiles the arithmetic grammar", [&]() { Grammar grammar = test_grammars::arithmetic(); ofstream(test_parser_dir + "/arithmetic.c") << compile(grammar, "arithmetic"); }); - it("works for the json grammar", [&]() { + it("compiles the json grammar", [&]() { Grammar grammar = test_grammars::json(); ofstream(test_parser_dir + "/json.c") << compile(grammar, "json"); }); diff --git a/spec/fixtures/parsers/json.c b/spec/fixtures/parsers/json.c index e686f794..bcb3fce0 100644 --- a/spec/fixtures/parsers/json.c +++ b/spec/fixtures/parsers/json.c @@ -83,22 +83,26 @@ static void ts_lex(TSParser *parser) { ADVANCE(13); LEX_ERROR(1, EXPECT({""})); case 13: + if (LOOKAHEAD_CHAR() == '\"') + ADVANCE(14); if (isalnum(LOOKAHEAD_CHAR())) ADVANCE(13); - ACCEPT_TOKEN(ts_symbol_string); + LEX_ERROR(2, EXPECT({"", "'\"'"})); case 14: - if (LOOKAHEAD_CHAR() == '}') - ADVANCE(15); - LEX_ERROR(1, EXPECT({"'}'"})); + ACCEPT_TOKEN(ts_symbol_string); case 15: - ACCEPT_TOKEN(ts_symbol_7); + if (LOOKAHEAD_CHAR() == '}') + ADVANCE(16); + LEX_ERROR(1, EXPECT({"'}'"})); case 16: - if (LOOKAHEAD_CHAR() == ':') - ADVANCE(17); - LEX_ERROR(1, EXPECT({"':'"})); + ACCEPT_TOKEN(ts_symbol_7); case 17: - ACCEPT_TOKEN(ts_symbol_5); + if (LOOKAHEAD_CHAR() == ':') + ADVANCE(18); + LEX_ERROR(1, EXPECT({"':'"})); case 18: + ACCEPT_TOKEN(ts_symbol_5); + case 19: if (LOOKAHEAD_CHAR() == '\"') ADVANCE(12); LEX_ERROR(1, EXPECT({"'\"'"})); @@ -204,7 +208,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 7: - SET_LEX_STATE(18); + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { case ts_symbol_string: SHIFT(8); @@ -212,7 +216,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"string"})); } case 8: - SET_LEX_STATE(16); + SET_LEX_STATE(17); switch (LOOKAHEAD_SYM()) { case ts_symbol_5: SHIFT(9); @@ -292,7 +296,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"2"})); } case 15: - SET_LEX_STATE(18); + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { case ts_symbol_string: SHIFT(16); @@ -300,7 +304,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"string"})); } case 16: - SET_LEX_STATE(16); + SET_LEX_STATE(17); switch (LOOKAHEAD_SYM()) { case ts_symbol_5: SHIFT(17); @@ -336,7 +340,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"6"})); } case 19: - SET_LEX_STATE(14); + SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { case ts_symbol_7: SHIFT(20); @@ -368,7 +372,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"6"})); } case 23: - SET_LEX_STATE(14); + SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { case ts_symbol_7: SHIFT(24); @@ -408,7 +412,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"__END__"})); } case 28: - SET_LEX_STATE(18); + SET_LEX_STATE(19); switch (LOOKAHEAD_SYM()) { case ts_symbol_string: SHIFT(29); @@ -416,7 +420,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"string"})); } case 29: - SET_LEX_STATE(16); + SET_LEX_STATE(17); switch (LOOKAHEAD_SYM()) { case ts_symbol_5: SHIFT(30); @@ -452,7 +456,7 @@ static TSParseResult ts_parse(const char *input) { PARSE_ERROR(1, EXPECT({"6"})); } case 32: - SET_LEX_STATE(14); + SET_LEX_STATE(15); switch (LOOKAHEAD_SYM()) { case ts_symbol_7: SHIFT(33); diff --git a/spec/runtime/json_spec.cpp b/spec/runtime/json_spec.cpp index 41ab5595..c844e6f8 100644 --- a/spec/runtime/json_spec.cpp +++ b/spec/runtime/json_spec.cpp @@ -14,9 +14,14 @@ describe("json", []() { }); it("parses strings", [&]() { - TSDocumentSetText(document, "\"foo\""); + TSDocumentSetText(document, "\"string\""); AssertThat(string(TSDocumentToString(document)), Equals("(value (string))")); }); + + it("parses objects", [&]() { + TSDocumentSetText(document, "{\"key1\":1}"); + AssertThat(string(TSDocumentToString(document)), Equals("(value (object (4) (string) (5) (value (number)) (6) (7)))")); + }); }); END_TEST \ No newline at end of file diff --git a/src/compiler/build_tables/rule_transitions.cpp b/src/compiler/build_tables/rule_transitions.cpp index ece1fd9f..f1f68647 100644 --- a/src/compiler/build_tables/rule_transitions.cpp +++ b/src/compiler/build_tables/rule_transitions.cpp @@ -13,10 +13,6 @@ namespace tree_sitter { public: transition_map value; - void visit(const Blank *rule) { - value = transition_map({{ blank(), blank() }}); - } - void visit(const Character *rule) { value = transition_map({{ rule->copy(), blank() }}); } @@ -39,6 +35,11 @@ namespace tree_sitter { else return seq({ left_rule, rule->right }); }); + if (rule_can_be_blank(rule->left)) { + value.merge(rule_transitions(rule->right), [&](rule_ptr left, rule_ptr right) -> rule_ptr { + return choice({ left, right }); + }); + } } void visit(const Repeat *rule) { @@ -59,17 +60,41 @@ namespace tree_sitter { } }; - bool rule_can_be_blank(const rule_ptr &rule) { - for (auto pair : rule_transitions(rule)) - if (is_blank(pair.first)) - return true; - return false; - } - transition_map rule_transitions(const rule_ptr &rule) { TransitionsVisitor visitor; rule->accept(visitor); return visitor.value; } + + class EpsilonVisitor : public rules::Visitor { + public: + bool value; + + void default_visit(const Rule *) { + value = false; + } + + void visit(const Blank *) { + value = true; + } + + void visit(const Choice *rule) { + value = rule_can_be_blank(rule->left) || rule_can_be_blank(rule->right); + } + + void visit(const Seq *rule) { + value = rule_can_be_blank(rule->left) && rule_can_be_blank(rule->right); + } + + void visit(const Repeat *rule) { + value = rule_can_be_blank(rule->content); + } + }; + + bool rule_can_be_blank(const rule_ptr &rule) { + EpsilonVisitor visitor; + rule->accept(visitor); + return visitor.value; + } } } diff --git a/src/runtime/parse_config.cpp b/src/runtime/parse_config.cpp index 4e33b8ed..42a6937f 100644 --- a/src/runtime/parse_config.cpp +++ b/src/runtime/parse_config.cpp @@ -11,7 +11,7 @@ const char * TSParseErrorToString(const TSParseError *error, const char *input_s result = string("Unexpected token ") + symbol_names[error->lookahead_sym] + ". "; break; case TSParseErrorTypeLexical: - result = string("Unexpected character '") + (input_string + error->position) + "'. "; + result = string("Unexpected character '") + input_string[error->position] + "'. "; break; default: return EMPTY;