Improve parser error messages
This commit is contained in:
parent
55809f702d
commit
2894ad7447
4 changed files with 62 additions and 35 deletions
|
|
@ -14,7 +14,7 @@ typedef struct TSStackEntry TSStackEntry;
|
|||
typedef struct TSParser {
|
||||
TSTree *tree;
|
||||
const char *input;
|
||||
char *error_message;
|
||||
const char *error_message;
|
||||
size_t position;
|
||||
TSTree *lookahead_node;
|
||||
TSState lex_state;
|
||||
|
|
@ -47,7 +47,7 @@ next_state:
|
|||
ts_symbol_names[LOOKAHEAD_SYM()]
|
||||
|
||||
#define START_LEXER() \
|
||||
if (LOOKAHEAD_SYM() > 0) return; \
|
||||
if (LOOKAHEAD_SYM() >= 0) return; \
|
||||
if (LOOKAHEAD_CHAR() == '\0') { ACCEPT_TOKEN(ts_symbol___END__); } \
|
||||
next_state:
|
||||
|
||||
|
|
|
|||
|
|
@ -6,11 +6,21 @@ extern TSParseConfig ts_parse_config_arithmetic;
|
|||
START_TEST
|
||||
|
||||
describe("arithmetic", []() {
|
||||
it("parses_numbers", [&]() {
|
||||
TSDocument *document = TSDocumentMake();
|
||||
TSDocument *document;
|
||||
|
||||
before_each([&]() {
|
||||
document = TSDocumentMake();
|
||||
TSDocumentSetUp(document, ts_parse_config_arithmetic);
|
||||
TSDocumentSetText(document, "w");
|
||||
printf("%s", TSDocumentToString(document));
|
||||
});
|
||||
|
||||
it("parses variables", [&]() {
|
||||
TSDocumentSetText(document, "x");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(expression (term (factor (variable))))"));
|
||||
});
|
||||
|
||||
it("parses products of variables", [&]() {
|
||||
TSDocumentSetText(document, "x*y");
|
||||
AssertThat(string(TSDocumentToString(document)), Equals("(expression (term (factor (number)) (factor (number)))"));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,26 @@
|
|||
#include "parser.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
|
||||
#define TS_DEBUG_PARSE
|
||||
#define TS_DEBUG_LEX
|
||||
|
||||
#ifdef TS_DEBUG_LEX
|
||||
#define DEBUG_LEX(...) \
|
||||
fprintf(stderr, __VA_ARGS__)
|
||||
#else
|
||||
#define DEBUG_LEX(...)
|
||||
#endif
|
||||
|
||||
#ifdef TS_DEBUG_PARSE
|
||||
#define DEBUG_PARSE(...) \
|
||||
fprintf(stderr, __VA_ARGS__)
|
||||
#else
|
||||
#define DEBUG_PARSE(...)
|
||||
#endif
|
||||
|
||||
static int INITIAL_STACK_SIZE = 100;
|
||||
|
||||
|
|
@ -17,13 +37,14 @@ TSParser TSParserMake(const char *input) {
|
|||
.position = 0,
|
||||
.lookahead_node = NULL,
|
||||
.lex_state = 0,
|
||||
.stack = calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)),
|
||||
.stack = (TSStackEntry *)calloc(INITIAL_STACK_SIZE, sizeof(TSStackEntry)),
|
||||
.stack_size = 0,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
||||
void TSParserShift(TSParser *parser, TSState parse_state) {
|
||||
DEBUG_PARSE("shift %d \n", parse_state);
|
||||
TSStackEntry *entry = (parser->stack + parser->stack_size);
|
||||
entry->state = parse_state;
|
||||
entry->node = parser->lookahead_node;
|
||||
|
|
@ -34,42 +55,37 @@ void TSParserShift(TSParser *parser, TSState parse_state) {
|
|||
void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count) {
|
||||
parser->stack_size -= child_count;
|
||||
|
||||
TSTree **children = malloc(child_count * sizeof(TSTree *));
|
||||
TSTree **children = (TSTree **)malloc(child_count * sizeof(TSTree *));
|
||||
for (int i = 0; i < child_count; i++) {
|
||||
size_t j = parser->stack_size + i;
|
||||
children[i] = parser->stack[j].node;
|
||||
children[i] = parser->stack[parser->stack_size + i].node;
|
||||
}
|
||||
|
||||
parser->lookahead_node = TSTreeMake(symbol, child_count, children);
|
||||
DEBUG_PARSE("reduce: %ld, state: %u \n", symbol, TSParserParseState(parser));
|
||||
}
|
||||
|
||||
void TSParserError(TSParser *parser, size_t count, const char **expected_inputs) {
|
||||
char *message = malloc(100 * sizeof(char));
|
||||
char *spot = message;
|
||||
sprintf(message, "Unexpected token '%ld'. Expected: ", TSParserLookaheadSym(parser));
|
||||
spot += strlen(message);
|
||||
for (int i = 0; i < count; i++) {
|
||||
spot += 2;
|
||||
sprintf(spot, "%s", expected_inputs[i]);
|
||||
spot += strlen(expected_inputs[i]);
|
||||
}
|
||||
parser->error_message = message;
|
||||
string result = "Unexpected token " + to_string(TSParserLookaheadSym(parser)) + ". ";
|
||||
result += "Expected tokens:";
|
||||
for (int i = 0; i < count; i++)
|
||||
result += string(" '") + expected_inputs[i] + "'";
|
||||
char *stuff = (char *)malloc(result.size() * sizeof(char));
|
||||
strcpy(stuff, result.c_str());
|
||||
parser->error_message = stuff;
|
||||
}
|
||||
|
||||
void TSParserLexError(TSParser *parser, size_t count, const char **expected_inputs) {
|
||||
char *message = malloc(100 * sizeof(char));
|
||||
char *spot = message;
|
||||
sprintf(message, "Unexpected character '%c'. Expected: ", parser->input[parser->position]);
|
||||
spot += 30;
|
||||
for (int i = 0; i < count; i++) {
|
||||
spot += 2;
|
||||
sprintf(spot, "%s", expected_inputs[i]);
|
||||
spot += strlen(expected_inputs[i]);
|
||||
}
|
||||
parser->error_message = message;
|
||||
string result = string("Unexpected character '") + TSParserLookaheadChar(parser) + "'. ";
|
||||
result += "Expected characters:";
|
||||
for (int i = 0; i < count; i++)
|
||||
result += string(" ") + expected_inputs[i] + "";
|
||||
char *stuff = (char *)malloc(result.size() * sizeof(char));
|
||||
strcpy(stuff, result.c_str());
|
||||
parser->error_message = stuff;
|
||||
}
|
||||
|
||||
void TSParserAdvance(TSParser *parser, TSState lex_state) {
|
||||
DEBUG_LEX("character: '%c' \n", TSParserLookaheadChar(parser));
|
||||
parser->position++;
|
||||
parser->lex_state = lex_state;
|
||||
}
|
||||
|
|
@ -84,6 +100,7 @@ long TSParserLookaheadSym(const TSParser *parser) {
|
|||
}
|
||||
|
||||
void TSParserSetLookaheadSym(TSParser *parser, TSSymbol symbol) {
|
||||
DEBUG_LEX("token: %ld \n", symbol);
|
||||
parser->lookahead_node = TSTreeMake(symbol, 0, NULL);
|
||||
}
|
||||
|
||||
|
|
@ -55,7 +55,7 @@
|
|||
12FD40D8185FEEDF0041A84E /* rules_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492EA181E200B008E9BDA /* rules_spec.cpp */; };
|
||||
12FD40D9185FEEDF0041A84E /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; };
|
||||
12FD40DB185FEF0D0041A84E /* arithmetic_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */; };
|
||||
12FD40DD185FF12C0041A84E /* parser.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DC185FF12C0041A84E /* parser.c */; };
|
||||
12FD40DD185FF12C0041A84E /* parser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DC185FF12C0041A84E /* parser.cpp */; };
|
||||
12FD40DF1860064C0041A84E /* tree.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40DE1860064C0041A84E /* tree.c */; };
|
||||
12FD40E2186245FE0041A84E /* transitions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E0186245FE0041A84E /* transitions.cpp */; };
|
||||
12FD40E718639B910041A84E /* visitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E618639B910041A84E /* visitor.cpp */; };
|
||||
|
|
@ -151,7 +151,7 @@
|
|||
12FD40D1185EEB5E0041A84E /* runtime_specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = runtime_specs; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
12FD40D4185FED9A0041A84E /* tree.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tree.h; sourceTree = "<group>"; };
|
||||
12FD40DA185FEF0D0041A84E /* arithmetic_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = arithmetic_spec.cpp; sourceTree = "<group>"; };
|
||||
12FD40DC185FF12C0041A84E /* parser.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = parser.c; sourceTree = "<group>"; };
|
||||
12FD40DC185FF12C0041A84E /* parser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parser.cpp; sourceTree = "<group>"; };
|
||||
12FD40DE1860064C0041A84E /* tree.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = tree.c; sourceTree = "<group>"; };
|
||||
12FD40E0186245FE0041A84E /* transitions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = transitions.cpp; sourceTree = "<group>"; };
|
||||
12FD40E1186245FE0041A84E /* transitions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transitions.h; sourceTree = "<group>"; };
|
||||
|
|
@ -358,7 +358,7 @@
|
|||
12FD40AD185EE5440041A84E /* runtime */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
12FD40DC185FF12C0041A84E /* parser.c */,
|
||||
12FD40DC185FF12C0041A84E /* parser.cpp */,
|
||||
12FD40DE1860064C0041A84E /* tree.c */,
|
||||
12EDCF8C187C6282005A7A07 /* document.c */,
|
||||
);
|
||||
|
|
@ -528,7 +528,7 @@
|
|||
12FD40B3185EEB5E0041A84E /* seq.cpp in Sources */,
|
||||
12FD40B4185EEB5E0041A84E /* table_builder.cpp in Sources */,
|
||||
12FD40B6185EEB5E0041A84E /* arithmetic.cpp in Sources */,
|
||||
12FD40DD185FF12C0041A84E /* parser.c in Sources */,
|
||||
12FD40DD185FF12C0041A84E /* parser.cpp in Sources */,
|
||||
12FD40B8185EEB5E0041A84E /* item.cpp in Sources */,
|
||||
12EDCF8A187B498C005A7A07 /* tree_spec.cpp in Sources */,
|
||||
12EDCF8D187C6282005A7A07 /* document.c in Sources */,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue