Improve parser error messages

This commit is contained in:
Max Brunsfeld 2014-01-09 13:31:30 -08:00
parent 55809f702d
commit 2894ad7447
4 changed files with 62 additions and 35 deletions

View file

@ -14,7 +14,7 @@ typedef struct TSStackEntry TSStackEntry;
typedef struct TSParser {
TSTree *tree;
const char *input;
char *error_message;
const char *error_message;
size_t position;
TSTree *lookahead_node;
TSState lex_state;
@ -47,7 +47,7 @@ next_state:
ts_symbol_names[LOOKAHEAD_SYM()]
#define START_LEXER() \
if (LOOKAHEAD_SYM() > 0) return; \
if (LOOKAHEAD_SYM() >= 0) return; \
if (LOOKAHEAD_CHAR() == '\0') { ACCEPT_TOKEN(ts_symbol___END__); } \
next_state:

View file

@ -6,11 +6,21 @@ extern TSParseConfig ts_parse_config_arithmetic;
START_TEST
describe("arithmetic", []() {
it("parses_numbers", [&]() {
TSDocument *document = TSDocumentMake();
TSDocument *document;
before_each([&]() {
document = TSDocumentMake();
TSDocumentSetUp(document, ts_parse_config_arithmetic);
TSDocumentSetText(document, "w");
printf("%s", TSDocumentToString(document));
});
it("parses variables", [&]() {
TSDocumentSetText(document, "x");
AssertThat(string(TSDocumentToString(document)), Equals("(expression (term (factor (variable))))"));
});
it("parses products of variables", [&]() {
TSDocumentSetText(document, "x*y");
AssertThat(string(TSDocumentToString(document)), Equals("(expression (term (factor (number)) (factor (number)))"));
});
});

View file

@ -1,6 +1,26 @@
#include "parser.h"
#include <stdio.h>
#include <string.h>
#include <string>
using std::string;
using std::to_string;
#define TS_DEBUG_PARSE
#define TS_DEBUG_LEX
#ifdef TS_DEBUG_LEX
#define DEBUG_LEX(...) \
fprintf(stderr, __VA_ARGS__)
#else
#define DEBUG_LEX(...)
#endif
#ifdef TS_DEBUG_PARSE
#define DEBUG_PARSE(...) \
fprintf(stderr, __VA_ARGS__)
#else
#define DEBUG_PARSE(...)
#endif
static int INITIAL_STACK_SIZE = 100;
@ -17,13 +37,14 @@ TSParser TSParserMake(const char *input) {
.position = 0,
.lookahead_node = NULL,
.lex_state = 0,
.stack = calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)),
.stack = (TSStackEntry *)calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)),
.stack_size = 0,
};
return result;
}
void TSParserShift(TSParser *parser, TSState parse_state) {
DEBUG_PARSE("shift %d \n", parse_state);
TSStackEntry *entry = (parser->stack + parser->stack_size);
entry->state = parse_state;
entry->node = parser->lookahead_node;
@ -34,42 +55,37 @@ void TSParserShift(TSParser *parser, TSState parse_state) {
void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) {
parser->stack_size -= child_count;
TSTree **children = malloc(child_count * sizeof(TSTree *));
TSTree **children = (TSTree **)malloc(child_count * sizeof(TSTree *));
for (int i = 0; i < child_count; i++) {
size_t j = parser->stack_size + i;
children[i] = parser->stack[j].node;
children[i] = parser->stack[parser->stack_size + i].node;
}
parser->lookahead_node = TSTreeMake(symbol, child_count, children);
DEBUG_PARSE("reduce: %ld, state: %u \n", symbol, TSParserParseState(parser));
}
void TSParserError(TSParser *parser, size_t count, const char **expected_inputs) {
char *message = malloc(100 * sizeof(char));
char *spot = message;
sprintf(message, "Unexpected token '%ld'. Expected: ", TSParserLookaheadSym(parser));
spot += strlen(message);
for (int i = 0; i < count; i++) {
spot += 2;
sprintf(spot, "%s", expected_inputs[i]);
spot += strlen(expected_inputs[i]);
}
parser->error_message = message;
string result = "Unexpected token " + to_string(TSParserLookaheadSym(parser)) + ". ";
result += "Expected tokens:";
for (int i = 0; i < count; i++)
result += string(" '") + expected_inputs[i] + "'";
char *stuff = (char *)malloc(result.size() * sizeof(char));
strcpy(stuff, result.c_str());
parser->error_message = stuff;
}
void TSParserLexError(TSParser *parser, size_t count, const char **expected_inputs) {
char *message = malloc(100 * sizeof(char));
char *spot = message;
sprintf(message, "Unexpected character '%c'. Expected: ", parser->input[parser->position]);
spot += 30;
for (int i = 0; i < count; i++) {
spot += 2;
sprintf(spot, "%s", expected_inputs[i]);
spot += strlen(expected_inputs[i]);
}
parser->error_message = message;
string result = string("Unexpected character '") + TSParserLookaheadChar(parser) + "'. ";
result += "Expected characters:";
for (int i = 0; i < count; i++)
result += string(" ") + expected_inputs[i] + "";
char *stuff = (char *)malloc(result.size() * sizeof(char));
strcpy(stuff, result.c_str());
parser->error_message = stuff;
}
void TSParserAdvance(TSParser *parser, TSState lex_state) {
DEBUG_LEX("character: '%c' \n", TSParserLookaheadChar(parser));
parser->position++;
parser->lex_state = lex_state;
}
@ -84,6 +100,7 @@ long TSParserLookaheadSym(const TSParser *parser) {
}
void TSParserSetLookaheadSym(TSParser *parser, TSSymbol symbol) {
DEBUG_LEX("token: %ld \n", symbol);
parser->lookahead_node = TSTreeMake(symbol, 0, NULL);
}

View file

@ -55,7 +55,7 @@
12FD40D8185FEEDF0041A84E /* rules_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492EA181E200B008E9BDA /* rules_spec.cpp */; };
12FD40D9185FEEDF0041A84E /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; };
12FD40DB185FEF0D0041A84E /* arithmetic_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */; };
12FD40DD185FF12C0041A84E /* parser.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DC185FF12C0041A84E /* parser.c */; };
12FD40DD185FF12C0041A84E /* parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DC185FF12C0041A84E /* parser.cpp */; };
12FD40DF1860064C0041A84E /* tree.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DE1860064C0041A84E /* tree.c */; };
12FD40E2186245FE0041A84E /* transitions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E0186245FE0041A84E /* transitions.cpp */; };
12FD40E718639B910041A84E /* visitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E618639B910041A84E /* visitor.cpp */; };
@ -151,7 +151,7 @@
12FD40D1185EEB5E0041A84E /* runtime_specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = runtime_specs; sourceTree = BUILT_PRODUCTS_DIR; };
12FD40D4185FED9A0041A84E /* tree.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tree.h; sourceTree = "<group>"; };
12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = arithmetic_spec.cpp; sourceTree = "<group>"; };
12FD40DC185FF12C0041A84E /* parser.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = "<group>"; };
12FD40DC185FF12C0041A84E /* parser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parser.cpp; sourceTree = "<group>"; };
12FD40DE1860064C0041A84E /* tree.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = tree.c; sourceTree = "<group>"; };
12FD40E0186245FE0041A84E /* transitions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = transitions.cpp; sourceTree = "<group>"; };
12FD40E1186245FE0041A84E /* transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transitions.h; sourceTree = "<group>"; };
@ -358,7 +358,7 @@
12FD40AD185EE5440041A84E /* runtime */ = {
isa = PBXGroup;
children = (
12FD40DC185FF12C0041A84E /* parser.c */,
12FD40DC185FF12C0041A84E /* parser.cpp */,
12FD40DE1860064C0041A84E /* tree.c */,
12EDCF8C187C6282005A7A07 /* document.c */,
);
@ -528,7 +528,7 @@
12FD40B3185EEB5E0041A84E /* seq.cpp in Sources */,
12FD40B4185EEB5E0041A84E /* table_builder.cpp in Sources */,
12FD40B6185EEB5E0041A84E /* arithmetic.cpp in Sources */,
12FD40DD185FF12C0041A84E /* parser.c in Sources */,
12FD40DD185FF12C0041A84E /* parser.cpp in Sources */,
12FD40B8185EEB5E0041A84E /* item.cpp in Sources */,
12EDCF8A187B498C005A7A07 /* tree_spec.cpp in Sources */,
12EDCF8D187C6282005A7A07 /* document.c in Sources */,