Add parser error messages

This commit is contained in:
Max Brunsfeld 2014-01-08 18:35:16 -08:00
parent 614e497ac4
commit 55809f702d
14 changed files with 213 additions and 83 deletions

View file

@ -2,19 +2,19 @@
#define __tree_sitter_document_h__
#include "./tree.h"
#include "./parse_config.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct TSDocument TSDocument;
typedef TSTree * TSDocumentParseFn(const char *);
TSDocument * TSDocumentMake();
void TSDocumentSetUp(TSDocument *document, TSDocumentParseFn fn, const char **symbol_names);
void TSDocumentSetUp(TSDocument *document, TSParseConfig config);
void TSDocumentSetText(TSDocument *document, const char *text);
TSTree * TSDocumentTree(const TSDocument *document);
char * TSDocumentToString(const TSDocument *document);
const char * TSDocumentToString(const TSDocument *document);
#ifdef __cplusplus
}

23
include/parse_config.h Normal file
View file

@ -0,0 +1,23 @@
#ifndef __tree_sitter_parse_config_h__
#define __tree_sitter_parse_config_h__
#include "tree.h"
typedef struct {
const char *message;
size_t position;
} TSParseError;
typedef struct {
TSParseError error;
TSTree *tree;
} TSParseResult;
typedef TSParseResult TSParseFn(const char *);
typedef struct {
TSParseFn *parse_fn;
const char **symbol_names;
} TSParseConfig;
#endif

View file

@ -2,6 +2,7 @@
#define __tree_sitter_parser_h__
#include "tree.h"
#include "parse_config.h"
#include <stdio.h>
#ifdef __cplusplus
@ -13,6 +14,7 @@ typedef struct TSStackEntry TSStackEntry;
typedef struct TSParser {
TSTree *tree;
const char *input;
char *error_message;
size_t position;
TSTree *lookahead_node;
TSState lex_state;
@ -23,11 +25,13 @@ typedef struct TSParser {
TSParser TSParserMake(const char *input);
void TSParserShift(TSParser *parser, TSState state);
void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count);
void TSParserError(TSParser *parser);
void TSParserLexError(TSParser *parser, size_t count, const char **expected_inputs);
void TSParserError(TSParser *parser, size_t count, const char **expected_inputs);
void TSParserAcceptInput(TSParser *parser);
void TSParserAdvance(TSParser *parser, TSState lex_state);
TSState TSParserParseState(const TSParser *parser);
TSState TSParserLexState(const TSParser *parser);
TSParseResult TSParserResult(TSParser *parser);
void TSParserSetLexState(TSParser *parser, TSState state);
char TSParserLookaheadChar(const TSParser *parser);
long TSParserLookaheadSym(const TSParser *parser);
@ -77,19 +81,35 @@ TSParserLexState(parser)
#define REDUCE(symbol, child_count) \
{ TSParserReduce(parser, symbol, child_count); goto next_state; }
#define PARSE_ERROR() \
{ TSParserError(parser); goto done; }
#define PARSE_ERROR(count, inputs) \
{ \
static const char *expected_inputs[] = inputs; \
TSParserError(parser, count, expected_inputs); \
goto done; \
}
#define LEX_ERROR() \
{ TSParserError(parser); goto done; }
#define LEX_ERROR(count, inputs) \
{ \
static const char *expected_inputs[] = inputs; \
TSParserLexError(parser, count, expected_inputs); \
goto done; \
}
#define EXPECT(...) __VA_ARGS__
#define FINISH_PARSER() \
done: \
return parser->tree;
return TSParserResult(parser);
#define FINISH_LEXER() \
done:
#define LEX_PANIC() \
printf("Lex error: unexpected state %ud", LEX_STATE());
#define PARSE_PANIC() \
printf("Parse error: unexpected state %ud", PARSE_STATE());
#ifdef __cplusplus
}
#endif

View file

@ -5,8 +5,8 @@
using namespace tree_sitter::lr;
using namespace tree_sitter::rules;
typedef std::unordered_set<ParseAction> parse_actions;
typedef std::unordered_set<LexAction> lex_actions;
typedef unordered_set<ParseAction> parse_actions;
typedef unordered_set<LexAction> lex_actions;
START_TEST
@ -15,27 +15,25 @@ describe("building parse and lex tables", []() {
{ "expression", choice({
seq({
sym("term"),
sym("plus-token"),
sym("plus"),
sym("term") }),
sym("term") }) },
{ "term", choice({
sym("variable"),
sym("number"),
seq({
sym("left-paren-token"),
sym("left-paren"),
sym("expression"),
sym("right-paren-token")
}) }) },
{ "variable", sym("variable-token") },
{ "number", sym("number-token") }
sym("right-paren")
}) }) }
});
Grammar lex_grammar({
{ "plus-token", character('+') },
{ "variable-token", pattern("\\w+") },
{ "number-token", pattern("\\d+") },
{ "left-paren-token", character('(') },
{ "right-paren-token", character(')') }
{ "plus", str("+") },
{ "variable", pattern("\\w+") },
{ "number", pattern("\\d+") },
{ "left-paren", str("(") },
{ "right-paren", str(")") }
});
pair<ParseTable, LexTable> tables = build_tables(grammar, lex_grammar);
@ -58,9 +56,9 @@ describe("building parse and lex tables", []() {
{ "number", parse_actions({ ParseAction::Shift(5) }) },
{ "variable", parse_actions({ ParseAction::Shift(5) }) },
{ "left-paren-token", parse_actions({ ParseAction::Shift(6) }) },
{ "variable-token", parse_actions({ ParseAction::Shift(9) }) },
{ "number-token", parse_actions({ ParseAction::Shift(10) }) },
{ "left-paren", parse_actions({ ParseAction::Shift(6) }) },
{ "variable", parse_actions({ ParseAction::Shift(9) }) },
{ "number", parse_actions({ ParseAction::Shift(10) }) },
})));
AssertThat(lex_state(0).actions, Equals(unordered_map<CharMatch, lex_actions>({
@ -68,6 +66,12 @@ describe("building parse and lex tables", []() {
{ CharMatchClass(CharClassDigit), lex_actions({ LexAction::Advance(2) }) },
{ CharMatchClass(CharClassWord), lex_actions({ LexAction::Advance(3) }) },
})));
AssertThat(lex_state(0).expected_inputs(), Equals(unordered_set<CharMatch>({
CharMatchSpecific('('),
CharMatchClass(CharClassDigit),
CharMatchClass(CharClassWord),
})));
});
it("accepts when the start symbol is reduced", [&]() {
@ -78,7 +82,7 @@ describe("building parse and lex tables", []() {
it("has the right next states", [&]() {
AssertThat(parse_state(2).actions, Equals(unordered_map<string, parse_actions>({
{ "plus-token", parse_actions({ ParseAction::Shift(3) }) },
{ "plus", parse_actions({ ParseAction::Shift(3) }) },
})));
});
});

View file

@ -1,5 +1,4 @@
#include "parser.h"
#include "document.h"
#include <ctype.h>
enum ts_symbol {
@ -38,7 +37,7 @@ static void ts_lex(TSParser *parser) {
ADVANCE(2);
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(1);
LEX_ERROR();
LEX_ERROR(3, EXPECT({"'('", "<digit>", "<word>"}));
case 1:
ACCEPT_TOKEN(ts_symbol_1);
case 2:
@ -50,32 +49,32 @@ static void ts_lex(TSParser *parser) {
ADVANCE(3);
ACCEPT_TOKEN(ts_symbol_variable);
case 4:
LEX_ERROR();
LEX_ERROR(0, EXPECT({}));
case 5:
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(6);
LEX_ERROR();
LEX_ERROR(1, EXPECT({"'+'"}));
case 6:
ACCEPT_TOKEN(ts_symbol_4);
case 7:
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(8);
LEX_ERROR();
LEX_ERROR(1, EXPECT({"'*'"}));
case 8:
ACCEPT_TOKEN(ts_symbol_3);
case 9:
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(10);
LEX_ERROR();
LEX_ERROR(1, EXPECT({"')'"}));
case 10:
ACCEPT_TOKEN(ts_symbol_2);
default:
LEX_ERROR();
LEX_PANIC();
}
FINISH_LEXER();
}
static TSTree * ts_parse(const char *input) {
static TSParseResult ts_parse(const char *input) {
START_PARSER();
switch (PARSE_STATE()) {
case 0:
@ -94,7 +93,7 @@ static TSTree * ts_parse(const char *input) {
case ts_symbol_expression:
SHIFT(1);
default:
PARSE_ERROR();
PARSE_ERROR(6, EXPECT({"expression", "term", "1", "number", "factor", "variable"}));
}
case 1:
SET_LEX_STATE(4);
@ -102,7 +101,7 @@ static TSTree * ts_parse(const char *input) {
case ts_symbol___END__:
ACCEPT_INPUT();
default:
PARSE_ERROR();
PARSE_ERROR(1, EXPECT({"__END__"}));
}
case 2:
SET_LEX_STATE(5);
@ -126,7 +125,7 @@ static TSTree * ts_parse(const char *input) {
case ts_symbol_term:
SHIFT(4);
default:
PARSE_ERROR();
PARSE_ERROR(5, EXPECT({"term", "number", "1", "factor", "variable"}));
}
case 4:
SET_LEX_STATE(4);
@ -154,7 +153,7 @@ static TSTree * ts_parse(const char *input) {
case ts_symbol_factor:
SHIFT(7);
default:
PARSE_ERROR();
PARSE_ERROR(4, EXPECT({"factor", "variable", "number", "1"}));
}
case 7:
SET_LEX_STATE(4);
@ -184,7 +183,7 @@ static TSTree * ts_parse(const char *input) {
case ts_symbol_expression:
SHIFT(10);
default:
PARSE_ERROR();
PARSE_ERROR(6, EXPECT({"expression", "term", "1", "number", "factor", "variable"}));
}
case 10:
SET_LEX_STATE(9);
@ -192,7 +191,7 @@ static TSTree * ts_parse(const char *input) {
case ts_symbol_2:
SHIFT(11);
default:
PARSE_ERROR();
PARSE_ERROR(1, EXPECT({"2"}));
}
case 11:
SET_LEX_STATE(4);
@ -201,11 +200,12 @@ static TSTree * ts_parse(const char *input) {
REDUCE(ts_symbol_factor, 3);
}
default:
PARSE_ERROR();
PARSE_PANIC();
}
FINISH_PARSER();
}
void TSDocumentSetUp_arithmetic(TSDocument *document) {
TSDocumentSetUp(document, ts_parse, ts_symbol_names);
}
TSParseConfig ts_parse_config_arithmetic = {
.parse_fn = ts_parse,
.symbol_names = ts_symbol_names
};

View file

@ -1,13 +1,15 @@
#include "spec_helper.h"
#include "../fixtures/parsers/arithmetic.c"
#include "document.h"
extern TSParseConfig ts_parse_config_arithmetic;
START_TEST
describe("arithmetic", []() {
it("parses_numbers", [&]() {
TSDocument *document = TSDocumentMake();
TSDocumentSetUp_arithmetic(document);
TSDocumentSetText(document, "5");
TSDocumentSetUp(document, ts_parse_config_arithmetic);
TSDocumentSetText(document, "w");
printf("%s", TSDocumentToString(document));
});
});

View file

@ -22,7 +22,7 @@ namespace tree_sitter {
pos += replace.length();
}
}
string join(vector<string> lines, string separator) {
string result;
bool started = false;
@ -105,10 +105,10 @@ namespace tree_sitter {
}
}
string code_for_parse_actions(const unordered_set<ParseAction> &actions) {
string code_for_parse_actions(const unordered_set<ParseAction> &actions, const unordered_set<string> &expected_inputs) {
auto action = actions.begin();
if (action == actions.end()) {
return "PARSE_ERROR();";
return parse_error_call(expected_inputs);
} else {
switch (action->type) {
case ParseActionTypeAccept:
@ -122,11 +122,35 @@ namespace tree_sitter {
}
}
}
string parse_error_call(const unordered_set<string> &expected_inputs) {
string result = "PARSE_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
bool started = false;
for (auto symbol_name : expected_inputs) {
if (started) result += ", ";
started = true;
result += "\"" + symbol_name + "\"";
}
result += "}));";
return result;
}
string lex_error_call(const unordered_set<CharMatch> &expected_inputs) {
string result = "LEX_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
bool started = false;
for (auto match : expected_inputs) {
if (started) result += ", ";
started = true;
result += "\"" + CharMatchToString(match) + "\"";
}
result += "}));";
return result;
}
string code_for_lex_actions(const unordered_set<LexAction> &actions) {
string code_for_lex_actions(const unordered_set<LexAction> &actions, const unordered_set<CharMatch> &expected_inputs) {
auto action = actions.begin();
if (action == actions.end()) {
return "LEX_ERROR();";
return lex_error_call(expected_inputs);
} else {
switch (action->type) {
case LexActionTypeAdvance:
@ -142,8 +166,8 @@ namespace tree_sitter {
string code_for_parse_state(const ParseState &parse_state) {
string body = "";
for (auto pair : parse_state.actions)
body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second));
body += _default(code_for_parse_actions(parse_state.default_actions));
body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second, parse_state.expected_inputs()));
body += _default(code_for_parse_actions(parse_state.default_actions, parse_state.expected_inputs()));
return
string("SET_LEX_STATE(") + to_string(parse_state.lex_state_index) + ");\n" +
_switch("LOOKAHEAD_SYM()", body);
@ -151,9 +175,10 @@ namespace tree_sitter {
string switch_on_lookahead_char(const LexState &parse_state) {
string result = "";
auto expected_inputs = parse_state.expected_inputs();
for (auto pair : parse_state.actions)
result += _if(condition_for_char_match(pair.first), code_for_lex_actions(pair.second));
result += code_for_lex_actions(parse_state.default_actions);
result += _if(condition_for_char_match(pair.first), code_for_lex_actions(pair.second, expected_inputs));
result += code_for_lex_actions(parse_state.default_actions, expected_inputs);
return result;
}
@ -161,7 +186,7 @@ namespace tree_sitter {
string body = "";
for (int i = 0; i < parse_table.states.size(); i++)
body += _case(std::to_string(i), code_for_parse_state(parse_table.states[i]));
body += _default("PARSE_ERROR();");
body += _default("PARSE_PANIC();");
return _switch("PARSE_STATE()", body);
}
@ -169,7 +194,7 @@ namespace tree_sitter {
string body = "";
for (int i = 0; i < lex_table.states.size(); i++)
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
body += _default("LEX_ERROR();");
body += _default("LEX_PANIC();");
return _switch("LEX_STATE()", body);
}
@ -192,21 +217,10 @@ namespace tree_sitter {
string includes() {
return join({
"#include \"parser.h\"",
"#include \"document.h\"",
"#include <ctype.h>"
});
}
string parse_function() {
return join({
"static TSTree * ts_parse(const char *input) {",
indent("START_PARSER();"),
indent(switch_on_parse_state()),
indent("FINISH_PARSER();"),
"}"
});
}
string lex_function() {
return join({
"static void ts_lex(TSParser *parser) {",
@ -217,14 +231,25 @@ namespace tree_sitter {
});
}
string setup_function() {
string parse_function() {
return join({
"void TSDocumentSetUp_arithmetic(TSDocument *document) {",
indent("TSDocumentSetUp(document, ts_parse, ts_symbol_names);"),
"static TSParseResult ts_parse(const char *input) {",
indent("START_PARSER();"),
indent(switch_on_parse_state()),
indent("FINISH_PARSER();"),
"}"
});
}
string parse_config_struct() {
return join({
"TSParseConfig ts_parse_config_arithmetic = {",
indent(".parse_fn = ts_parse,"),
indent(".symbol_names = ts_symbol_names"),
"};"
});
}
string code() {
return join({
includes(),
@ -232,7 +257,7 @@ namespace tree_sitter {
rule_names_list(),
lex_function(),
parse_function(),
setup_function(),
parse_config_struct(),
}, "\n\n") + "\n";
}
};

View file

@ -45,6 +45,14 @@ namespace tree_sitter {
}
}
// State
unordered_set<CharMatch> LexState::expected_inputs() const {
unordered_set<CharMatch> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
}
// Table
size_t LexTable::add_state() {
states.push_back(LexState());

View file

@ -50,6 +50,7 @@ namespace tree_sitter {
public:
std::unordered_map<CharMatch, std::unordered_set<LexAction>> actions;
std::unordered_set<LexAction> default_actions;
std::unordered_set<CharMatch> expected_inputs() const;
};
class LexTable {

View file

@ -3,6 +3,7 @@
using std::string;
using std::ostream;
using std::to_string;
using std::unordered_set;
namespace tree_sitter {
namespace lr {
@ -52,6 +53,13 @@ namespace tree_sitter {
// State
ParseState::ParseState() : lex_state_index(-1) {}
unordered_set<string> ParseState::expected_inputs() const {
unordered_set<string> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
}
// Table
size_t ParseTable::add_state() {
states.push_back(ParseState());

View file

@ -54,6 +54,7 @@ namespace tree_sitter {
ParseState();
std::unordered_map<std::string, std::unordered_set<ParseAction>> actions;
std::unordered_set<ParseAction> default_actions;
std::unordered_set<std::string> expected_inputs() const;
size_t lex_state_index;
};

View file

@ -1,9 +1,10 @@
#include "document.h"
struct TSDocument {
TSDocumentParseFn *parse_fn;
TSParseFn *parse_fn;
const char **symbol_names;
const char *text;
const char *error_message;
TSTree *tree;
};
@ -12,22 +13,26 @@ TSDocument * TSDocumentMake() {
return result;
}
void TSDocumentSetUp(TSDocument *document, TSDocumentParseFn fn, const char **symbol_names) {
document->parse_fn = fn;
document->symbol_names = symbol_names;
void TSDocumentSetUp(TSDocument *document, TSParseConfig config) {
document->parse_fn = config.parse_fn;
document->symbol_names = config.symbol_names;
}
void TSDocumentSetText(TSDocument *document, const char *text) {
document->text = text;
document->tree = document->parse_fn(document->text);
TSParseResult result = document->parse_fn(text);
document->tree = result.tree;
document->error_message = result.error.message;
}
TSTree * TSDocumentTree(const TSDocument *document) {
return document->tree;
}
char * TSDocumentToString(const TSDocument *document) {
if (document->tree)
const char * TSDocumentToString(const TSDocument *document) {
if (document->error_message) {
return document->error_message;
} else if (document->tree)
return TSTreeToString(document->tree, document->symbol_names);
else
return "#<null tree>";

View file

@ -1,5 +1,6 @@
#include "parser.h"
#include <stdio.h>
#include <string.h>
static int INITIAL_STACK_SIZE = 100;
@ -12,6 +13,7 @@ TSParser TSParserMake(const char *input) {
TSParser result = {
.tree = NULL,
.input = input,
.error_message = NULL,
.position = 0,
.lookahead_node = NULL,
.lex_state = 0,
@ -41,8 +43,30 @@ void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) {
parser->lookahead_node = TSTreeMake(symbol, child_count, children);
}
void TSParserError(TSParser *parser) {
void TSParserError(TSParser *parser, size_t count, const char **expected_inputs) {
char *message = malloc(100 * sizeof(char));
char *spot = message;
sprintf(message, "Unexpected token '%ld'. Expected: ", TSParserLookaheadSym(parser));
spot += strlen(message);
for (int i = 0; i < count; i++) {
spot += 2;
sprintf(spot, "%s", expected_inputs[i]);
spot += strlen(expected_inputs[i]);
}
parser->error_message = message;
}
void TSParserLexError(TSParser *parser, size_t count, const char **expected_inputs) {
char *message = malloc(100 * sizeof(char));
char *spot = message;
sprintf(message, "Unexpected character '%c'. Expected: ", parser->input[parser->position]);
spot += 30;
for (int i = 0; i < count; i++) {
spot += 2;
sprintf(spot, "%s", expected_inputs[i]);
spot += strlen(expected_inputs[i]);
}
parser->error_message = message;
}
void TSParserAdvance(TSParser *parser, TSState lex_state) {
@ -79,3 +103,10 @@ void TSParserAcceptInput(TSParser *parser) {
parser->tree = parser->stack[parser->stack_size - 1].node;
}
TSParseResult TSParserResult(TSParser *parser) {
TSParseResult result = {
.tree = parser->tree,
.error = { .position = parser->position, .message = parser->error_message }
};
return result;
}

View file

@ -139,6 +139,7 @@
12EDCF89187B498C005A7A07 /* tree_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tree_spec.cpp; sourceTree = "<group>"; };
12EDCF8B187C6251005A7A07 /* document.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = document.h; sourceTree = "<group>"; };
12EDCF8C187C6282005A7A07 /* document.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = document.c; sourceTree = "<group>"; };
12EDCF8E187DB33E005A7A07 /* parse_config.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = parse_config.h; sourceTree = "<group>"; };
12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/compiler/spec_helper.cpp; sourceTree = SOURCE_ROOT; };
12F9A64D182DD5FD00FAF50C /* spec_helper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = spec_helper.h; path = spec/compiler/spec_helper.h; sourceTree = SOURCE_ROOT; };
12F9A64F182DD6BC00FAF50C /* grammar.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = grammar.cpp; path = src/compiler/grammar/grammar.cpp; sourceTree = SOURCE_ROOT; };
@ -401,6 +402,7 @@
12FD40D4185FED9A0041A84E /* tree.h */,
121D8B3018795CC0003CF44B /* parser.h */,
12EDCF8B187C6251005A7A07 /* document.h */,
12EDCF8E187DB33E005A7A07 /* parse_config.h */,
);
path = include;
sourceTree = "<group>";