Add code for setting parse states’ corresponding lex states
This commit is contained in:
parent
a5e39d2512
commit
29a9b4643d
6 changed files with 33 additions and 36 deletions
|
|
@ -36,6 +36,7 @@ void TSParserError(TSParser *parser);
|
|||
TSState TSParserParseState(const TSParser *parser);
|
||||
TSState TSParserLexState(const TSParser *parser);
|
||||
TSSymbol TSParserLookahead(const TSParser *parser);
|
||||
void TSParserSetLexState(const TSParser *parser, TSState state);
|
||||
|
||||
#pragma mark - DSL
|
||||
|
||||
|
|
@ -62,6 +63,9 @@ TSSymbol TSParserLookahead(const TSParser *parser);
|
|||
#define SHIFT(number) \
|
||||
{ TSParserShift(&parser, number); break; }
|
||||
|
||||
#define SET_LEX_STATE(state_index) \
|
||||
{ TSParserSetLexState(&parser, state_index); }
|
||||
|
||||
#define ADVANCE(state_index) \
|
||||
{ break; }
|
||||
|
||||
|
|
|
|||
14
spec/fixtures/parsers/arithmetic.c
vendored
14
spec/fixtures/parsers/arithmetic.c
vendored
|
|
@ -1,5 +1,4 @@
|
|||
#include "runtime.h"
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
|
||||
typedef enum {
|
||||
|
|
@ -15,7 +14,6 @@ typedef enum {
|
|||
ts_symbol___END__
|
||||
} ts_symbol;
|
||||
|
||||
|
||||
static void ts_lex(TSParser *parser) {
|
||||
START_LEXER();
|
||||
switch (LEX_STATE()) {
|
||||
|
|
@ -135,6 +133,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
START_PARSER();
|
||||
switch (PARSE_STATE()) {
|
||||
case 0:
|
||||
SET_LEX_STATE(0);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_left_paren:
|
||||
SHIFT(9);
|
||||
|
|
@ -152,6 +151,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
PARSE_ERROR();
|
||||
}
|
||||
case 1:
|
||||
SET_LEX_STATE(12);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol___END__:
|
||||
ACCEPT_INPUT();
|
||||
|
|
@ -159,6 +159,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
PARSE_ERROR();
|
||||
}
|
||||
case 2:
|
||||
SET_LEX_STATE(13);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_plus:
|
||||
SHIFT(3);
|
||||
|
|
@ -166,6 +167,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
PARSE_ERROR();
|
||||
}
|
||||
case 3:
|
||||
SET_LEX_STATE(15);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_variable:
|
||||
SHIFT(8);
|
||||
|
|
@ -181,11 +183,13 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
PARSE_ERROR();
|
||||
}
|
||||
case 4:
|
||||
SET_LEX_STATE(17);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
default:
|
||||
REDUCE(ts_symbol_expression, 3);
|
||||
}
|
||||
case 5:
|
||||
SET_LEX_STATE(18);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_times:
|
||||
SHIFT(6);
|
||||
|
|
@ -193,6 +197,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
PARSE_ERROR();
|
||||
}
|
||||
case 6:
|
||||
SET_LEX_STATE(20);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_left_paren:
|
||||
SHIFT(9);
|
||||
|
|
@ -206,16 +211,19 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
PARSE_ERROR();
|
||||
}
|
||||
case 7:
|
||||
SET_LEX_STATE(21);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
default:
|
||||
REDUCE(ts_symbol_term, 3);
|
||||
}
|
||||
case 8:
|
||||
SET_LEX_STATE(22);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
default:
|
||||
REDUCE(ts_symbol_factor, 1);
|
||||
}
|
||||
case 9:
|
||||
SET_LEX_STATE(23);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_left_paren:
|
||||
SHIFT(9);
|
||||
|
|
@ -233,6 +241,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
PARSE_ERROR();
|
||||
}
|
||||
case 10:
|
||||
SET_LEX_STATE(24);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_right_paren:
|
||||
SHIFT(11);
|
||||
|
|
@ -240,6 +249,7 @@ TSTree ts_parse_arithmetic(const char *input) {
|
|||
PARSE_ERROR();
|
||||
}
|
||||
case 11:
|
||||
SET_LEX_STATE(26);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
default:
|
||||
REDUCE(ts_symbol_factor, 3);
|
||||
|
|
|
|||
|
|
@ -116,12 +116,14 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
string switch_on_lookahead_sym(const ParseState &parse_state) {
|
||||
string code_for_parse_state(const ParseState &parse_state) {
|
||||
string body = "";
|
||||
for (auto pair : parse_state.actions)
|
||||
body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second));
|
||||
body += _default(code_for_parse_actions(parse_state.default_actions));
|
||||
return _switch("LOOKAHEAD_SYM()", body);
|
||||
return
|
||||
string("SET_LEX_STATE(") + to_string(parse_state.lex_state_index) + ");\n" +
|
||||
_switch("LOOKAHEAD_SYM()", body);
|
||||
}
|
||||
|
||||
string switch_on_lookahead_char(const LexState &parse_state) {
|
||||
|
|
@ -135,7 +137,7 @@ namespace tree_sitter {
|
|||
string switch_on_parse_state() {
|
||||
string body = "";
|
||||
for (int i = 0; i < parse_table.states.size(); i++)
|
||||
body += _case(std::to_string(i), switch_on_lookahead_sym(parse_table.states[i]));
|
||||
body += _case(std::to_string(i), code_for_parse_state(parse_table.states[i]));
|
||||
body += _default("PARSE_ERROR();");
|
||||
return _switch("PARSE_STATE()", body);
|
||||
}
|
||||
|
|
@ -154,13 +156,12 @@ namespace tree_sitter {
|
|||
result += indent(symbol_id(rule_name)) + ",\n";
|
||||
result += indent(symbol_id(ParseTable::END_OF_INPUT));
|
||||
return result + "\n"
|
||||
"} ts_symbol;\n";
|
||||
"} ts_symbol;";
|
||||
}
|
||||
|
||||
string includes() {
|
||||
return string(
|
||||
"#include \"runtime.h\"\n"
|
||||
"#include <stdlib.h>\n"
|
||||
"#include <ctype.h>");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
#pragma mark - container
|
||||
|
||||
ItemSet::const_iterator ItemSet::begin() const {
|
||||
return contents.begin();
|
||||
}
|
||||
|
|
@ -74,6 +75,7 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
#pragma mark - printing
|
||||
|
||||
ostream& operator<<(ostream &stream, const ItemSet &item_set) {
|
||||
stream << string("(item_set ");
|
||||
for (Item item : item_set) {
|
||||
|
|
|
|||
|
|
@ -43,3 +43,7 @@ TSState TSParserParseState(const TSParser *parser) {
|
|||
TSState TSParserLexState(const TSParser *parser) {
|
||||
return 5;
|
||||
}
|
||||
|
||||
void TSParserSetLexState(const TSParser *parser, TSState lex_state) {
|
||||
|
||||
}
|
||||
|
|
|
|||
34
todo.md
34
todo.md
|
|
@ -1,36 +1,12 @@
|
|||
TODO
|
||||
====
|
||||
|
||||
# complete the list of rule types
|
||||
|
||||
- add repeat rules
|
||||
- parse regex rules into trees of choices, sequences, repeats
|
||||
|
||||
# generate lexers for sets of terminal rules (can be mix of throwaway and meaningful)
|
||||
|
||||
Introduce ParseTable type which contains a vector of ParseStates. A ParseState contains a
|
||||
TransitionMap of ParseActions. For a lexer, a ParseAction can be one of:
|
||||
- Accept(symbol)
|
||||
- Advance(state index)
|
||||
|
||||
Then generate a C function for a ParseTable
|
||||
|
||||
# generate parsers from sets of non-termina rules
|
||||
|
||||
For a Parser, the ParseActions can be any of:
|
||||
- Accept(symbol)
|
||||
- Shift(state_index)
|
||||
- Reduce(symbol, number of child symbols)
|
||||
|
||||
# normalize grammars
|
||||
|
||||
- add concept of throwaway-terminals (tokens that won't appear in constructed AST)
|
||||
- classify rules as non-terminals or terminals
|
||||
- extract strings and regexes from non-terminal rules into their own throwaway-terminals,
|
||||
in order to separate lexing from parsing
|
||||
- separate rules into non-terminals and terminals
|
||||
- extract strings and regexes from non-terminal rules into their own terminals,
|
||||
in order to separate lexing from parsing
|
||||
|
||||
After this, a grammar will have these fields:
|
||||
- non-terminal rules
|
||||
- terminal rules
|
||||
- throwaway terminal rules
|
||||
# refine
|
||||
|
||||
- add concept of throwaway terminals (tokens that won't appear in constructed AST)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue