From c43ec90dad01338820456a80ac349dfc3f5d5893 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 27 Feb 2014 00:38:08 -0800 Subject: [PATCH] Fix missing auxiliary tokens in lex error state --- examples/grammars/arithmetic.hpp | 2 +- examples/parsers/arithmetic.c | 52 ++++++++++++++++++++++----- spec/main.cpp | 2 +- spec/runtime/arithmetic_spec.cpp | 47 ++++++++++++++++-------- src/compiler/build_tables/perform.cpp | 4 ++- 5 files changed, 81 insertions(+), 26 deletions(-) diff --git a/examples/grammars/arithmetic.hpp b/examples/grammars/arithmetic.hpp index 0ee9f53a..ec2c5c80 100644 --- a/examples/grammars/arithmetic.hpp +++ b/examples/grammars/arithmetic.hpp @@ -26,7 +26,7 @@ namespace test_grammars { sym("number"), seq({ str("("), - sym("expression"), + err(sym("expression")), str(")") }) }) }, { "plus", str("+") }, { "times", str("*") }, diff --git a/examples/parsers/arithmetic.c b/examples/parsers/arithmetic.c index 1eb850bc..4c20090a 100644 --- a/examples/parsers/arithmetic.c +++ b/examples/parsers/arithmetic.c @@ -26,6 +26,22 @@ SYMBOL_NAMES { static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count) { switch (state) { + case 6: + RECOVER(7, 1, EXPECT({ts_aux_sym_token2})); + case 13: + RECOVER(14, 1, EXPECT({ts_aux_sym_token2})); + case 23: + RECOVER(24, 1, EXPECT({ts_aux_sym_token2})); + case 26: + RECOVER(27, 1, EXPECT({ts_aux_sym_token2})); + case 31: + RECOVER(32, 1, EXPECT({ts_aux_sym_token2})); + case 41: + RECOVER(42, 1, EXPECT({ts_aux_sym_token2})); + case 46: + RECOVER(47, 1, EXPECT({ts_aux_sym_token2})); + case 49: + RECOVER(50, 1, EXPECT({ts_aux_sym_token2})); default: RECOVER(0, 0, EXPECT({})); } @@ -101,6 +117,10 @@ LEX_FN() { ADVANCE(7); LEX_ERROR(); case ts_lex_state_error: + if (LOOKAHEAD_CHAR() == '(') + ADVANCE(9); + if (LOOKAHEAD_CHAR() == ')') + ADVANCE(4); if (LOOKAHEAD_CHAR() == '*') ADVANCE(7); if (LOOKAHEAD_CHAR() == '+') @@ -206,8 +226,10 @@ PARSE_FN() { SHIFT(16); case ts_aux_sym_token1: SHIFT(26); + case ts_builtin_sym_error: + SHIFT(7); default: - PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); + PARSE_ERROR(7, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1, ts_builtin_sym_error})); } case 7: SET_LEX_STATE(3); @@ -288,8 +310,10 @@ PARSE_FN() { SHIFT(16); case ts_aux_sym_token1: SHIFT(26); + case ts_builtin_sym_error: + SHIFT(14); default: - PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); + PARSE_ERROR(7, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1, ts_builtin_sym_error})); } case 14: SET_LEX_STATE(3); @@ -402,8 +426,10 @@ PARSE_FN() { SHIFT(16); case ts_aux_sym_token1: SHIFT(26); + case ts_builtin_sym_error: + SHIFT(24); default: - PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); + PARSE_ERROR(7, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1, ts_builtin_sym_error})); } case 24: SET_LEX_STATE(3); @@ -436,8 +462,10 @@ PARSE_FN() { SHIFT(16); case ts_aux_sym_token1: SHIFT(26); + case ts_builtin_sym_error: + SHIFT(27); default: - PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); + PARSE_ERROR(7, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1, ts_builtin_sym_error})); } case 27: SET_LEX_STATE(3); @@ -492,8 +520,10 @@ PARSE_FN() { SHIFT(16); case ts_aux_sym_token1: SHIFT(26); + case ts_builtin_sym_error: + SHIFT(32); default: - PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); + PARSE_ERROR(7, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1, ts_builtin_sym_error})); } case 32: SET_LEX_STATE(3); @@ -606,8 +636,10 @@ PARSE_FN() { SHIFT(16); case ts_aux_sym_token1: SHIFT(26); + case ts_builtin_sym_error: + SHIFT(42); default: - PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); + PARSE_ERROR(7, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1, ts_builtin_sym_error})); } case 42: SET_LEX_STATE(3); @@ -658,8 +690,10 @@ PARSE_FN() { SHIFT(16); case ts_aux_sym_token1: SHIFT(26); + case ts_builtin_sym_error: + SHIFT(47); default: - PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); + PARSE_ERROR(7, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1, ts_builtin_sym_error})); } case 47: SET_LEX_STATE(3); @@ -694,8 +728,10 @@ PARSE_FN() { SHIFT(16); case ts_aux_sym_token1: SHIFT(26); + case ts_builtin_sym_error: + SHIFT(50); default: - PARSE_ERROR(6, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1})); + PARSE_ERROR(7, EXPECT({ts_sym_expression, ts_sym_factor, ts_sym_number, ts_sym_term, ts_sym_variable, ts_aux_sym_token1, ts_builtin_sym_error})); } case 50: SET_LEX_STATE(3); diff --git a/spec/main.cpp b/spec/main.cpp index 73ef9ab9..cb78429c 100644 --- a/spec/main.cpp +++ b/spec/main.cpp @@ -6,7 +6,7 @@ int main(int argc, char *argv[]) "", "--no-color", "--only=" - "" + "parenthesized expr" }; return bandit::run(4, const_cast(args)); } \ No newline at end of file diff --git a/spec/runtime/arithmetic_spec.cpp b/spec/runtime/arithmetic_spec.cpp index 4d4e97c2..bc341b25 100644 --- a/spec/runtime/arithmetic_spec.cpp +++ b/spec/runtime/arithmetic_spec.cpp @@ -5,44 +5,61 @@ extern ts_parse_config ts_parse_config_arithmetic; START_TEST describe("arithmetic", []() { - ts_document *document; + ts_document *doc; before_each([&]() { - document = ts_document_make(); - ts_document_set_parser(document, ts_parse_config_arithmetic); + doc = ts_document_make(); + ts_document_set_parser(doc, ts_parse_config_arithmetic); + }); + + after_each([&]() { + ts_document_free(doc); }); it("parses variables", [&]() { - ts_document_set_text(document, "x"); - AssertThat(string(ts_document_string(document)), Equals( + ts_document_set_text(doc, "x"); + AssertThat(string(ts_document_string(doc)), Equals( "(expression (term (factor (variable))))")); }); it("parses numbers", [&]() { - ts_document_set_text(document, "5"); - AssertThat(string(ts_document_string(document)), Equals( + ts_document_set_text(doc, "5"); + AssertThat(string(ts_document_string(doc)), Equals( "(expression (term (factor (number))))")); }); it("parses products of variables", [&]() { - ts_document_set_text(document, "x + y"); - AssertThat(string(ts_document_string(document)), Equals( + ts_document_set_text(doc, "x + y"); + AssertThat(string(ts_document_string(doc)), Equals( "(expression (term (factor (variable))) (plus) (term (factor (variable))))")); - ts_document_set_text(document, "x * y"); - AssertThat(string(ts_document_string(document)), Equals( + ts_document_set_text(doc, "x * y"); + AssertThat(string(ts_document_string(doc)), Equals( "(expression (term (factor (variable)) (times) (factor (variable))))")); }); it("parses complex trees", [&]() { - ts_document_set_text(document, "x * y + z * a"); - AssertThat(string(ts_document_string(document)), Equals( + ts_document_set_text(doc, "x * y + z * a"); + AssertThat(string(ts_document_string(doc)), Equals( "(expression (term (factor (variable)) (times) (factor (variable))) (plus) (term (factor (variable)) (times) (factor (variable))))")); - ts_document_set_text(document, "x * (y + z)"); - AssertThat(string(ts_document_string(document)), Equals( + ts_document_set_text(doc, "x * (y + z)"); + AssertThat(string(ts_document_string(doc)), Equals( "(expression (term (factor (variable)) (times) (factor (expression (term (factor (variable))) (plus) (term (factor (variable)))))))")); }); + + describe("error recovery", [&]() { + it("recovers from errors at the top level", [&]() { + ts_document_set_text(doc, "x * * y"); + AssertThat(string(ts_document_string(doc)), Equals("(ERROR)")); + }); + + it("recovers from errors in parenthesized expressions", [&]() { + ts_document_set_text(doc, "x + (y * + z) * 5"); + AssertThat(string(ts_document_string(doc)), Equals( + "(expression (term (factor (variable))) (plus) (term (factor (ERROR)) (times) (factor (number))))")); + }); + }); }); END_TEST \ No newline at end of file diff --git a/src/compiler/build_tables/perform.cpp b/src/compiler/build_tables/perform.cpp index 1ca06bc9..7d2abd3e 100644 --- a/src/compiler/build_tables/perform.cpp +++ b/src/compiler/build_tables/perform.cpp @@ -115,7 +115,9 @@ namespace tree_sitter { void add_error_lex_state() { LexItemSet error_item_set; for (auto &pair : lex_grammar.rules) - error_item_set.insert(LexItem(pair.first, pair.second)); + error_item_set.insert(LexItem(Symbol(pair.first), pair.second)); + for (auto &pair : lex_grammar.aux_rules) + error_item_set.insert(LexItem(Symbol(pair.first, rules::SymbolTypeAuxiliary), pair.second)); add_advance_actions(error_item_set, LexTable::ERROR_STATE_ID); add_accept_token_actions(error_item_set, LexTable::ERROR_STATE_ID); }