2014-03-09 22:05:17 -07:00
|
|
|
#ifndef TREE_SITTER_PARSER_H_
|
|
|
|
|
#define TREE_SITTER_PARSER_H_
|
2014-02-15 17:00:33 -08:00
|
|
|
|
2014-01-11 18:14:24 -08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
2014-03-08 15:04:23 -08:00
|
|
|
|
2014-10-03 16:06:08 -07:00
|
|
|
#include <stdbool.h>
|
2016-10-05 14:02:49 -07:00
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdlib.h>
|
2014-07-10 13:14:52 -07:00
|
|
|
|
2016-10-05 14:02:49 -07:00
|
|
|
#define ts_builtin_sym_error ((TSSymbol)-1)
|
|
|
|
|
#define ts_builtin_sym_end 0
|
2017-07-17 17:12:36 -07:00
|
|
|
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
|
2015-12-04 10:45:30 -08:00
|
|
|
|
2019-01-10 15:22:39 -08:00
|
|
|
#ifndef TREE_SITTER_API_H_
|
2018-05-11 12:57:41 -07:00
|
|
|
typedef uint16_t TSSymbol;
|
2019-02-07 12:29:20 -08:00
|
|
|
typedef uint16_t TSFieldId;
|
2018-05-17 17:59:50 -07:00
|
|
|
typedef struct TSLanguage TSLanguage;
|
|
|
|
|
#endif
|
2018-05-11 12:57:41 -07:00
|
|
|
|
2019-02-07 17:18:33 -08:00
|
|
|
typedef struct {
|
|
|
|
|
TSFieldId field_id;
|
|
|
|
|
uint8_t child_index;
|
|
|
|
|
bool inherited;
|
2019-02-08 16:06:29 -08:00
|
|
|
} TSFieldMapEntry;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
uint16_t index;
|
|
|
|
|
uint16_t length;
|
|
|
|
|
} TSFieldMapSlice;
|
2019-02-07 17:18:33 -08:00
|
|
|
|
2018-05-11 12:57:41 -07:00
|
|
|
typedef uint16_t TSStateId;
|
|
|
|
|
|
2015-11-22 13:32:20 -08:00
|
|
|
typedef struct {
|
|
|
|
|
bool visible : 1;
|
|
|
|
|
bool named : 1;
|
|
|
|
|
} TSSymbolMetadata;
|
2015-09-05 22:29:17 -07:00
|
|
|
|
2018-07-17 13:58:26 -07:00
|
|
|
typedef struct TSLexer TSLexer;
|
|
|
|
|
|
|
|
|
|
struct TSLexer {
|
2014-09-13 00:15:24 -07:00
|
|
|
int32_t lookahead;
|
2016-05-20 20:26:03 -07:00
|
|
|
TSSymbol result_symbol;
|
2018-07-17 13:58:26 -07:00
|
|
|
void (*advance)(TSLexer *, bool);
|
|
|
|
|
void (*mark_end)(TSLexer *);
|
|
|
|
|
uint32_t (*get_column)(TSLexer *);
|
2019-10-30 17:11:15 -07:00
|
|
|
bool (*is_at_included_range_start)(const TSLexer *);
|
|
|
|
|
bool (*eof)(const TSLexer *);
|
2018-07-17 13:58:26 -07:00
|
|
|
};
|
2014-07-10 13:14:52 -07:00
|
|
|
|
|
|
|
|
typedef enum {
|
2014-07-20 20:27:33 -07:00
|
|
|
TSParseActionTypeShift,
|
|
|
|
|
TSParseActionTypeReduce,
|
|
|
|
|
TSParseActionTypeAccept,
|
2016-05-09 14:31:44 -07:00
|
|
|
TSParseActionTypeRecover,
|
2014-07-10 13:14:52 -07:00
|
|
|
} TSParseActionType;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
2014-07-20 20:27:33 -07:00
|
|
|
union {
|
|
|
|
|
struct {
|
2017-07-21 10:17:54 -07:00
|
|
|
TSStateId state;
|
2017-07-13 17:17:22 -07:00
|
|
|
bool extra : 1;
|
2018-01-29 10:40:59 -08:00
|
|
|
bool repetition : 1;
|
2017-07-13 17:17:22 -07:00
|
|
|
};
|
|
|
|
|
struct {
|
2014-07-20 20:27:33 -07:00
|
|
|
TSSymbol symbol;
|
2017-07-14 11:05:54 -07:00
|
|
|
int16_t dynamic_precedence;
|
2017-07-13 17:17:22 -07:00
|
|
|
uint8_t child_count;
|
2019-02-12 11:06:18 -08:00
|
|
|
uint8_t production_id;
|
2014-07-20 20:27:33 -07:00
|
|
|
};
|
2017-07-21 10:17:54 -07:00
|
|
|
} params;
|
2016-06-21 22:53:48 -07:00
|
|
|
TSParseActionType type : 4;
|
2014-07-10 13:14:52 -07:00
|
|
|
} TSParseAction;
|
|
|
|
|
|
2016-11-30 09:34:47 -08:00
|
|
|
typedef struct {
|
|
|
|
|
uint16_t lex_state;
|
2016-12-21 11:24:41 -08:00
|
|
|
uint16_t external_lex_state;
|
2016-11-30 09:34:47 -08:00
|
|
|
} TSLexMode;
|
|
|
|
|
|
2015-12-29 11:20:52 -08:00
|
|
|
typedef union {
|
|
|
|
|
TSParseAction action;
|
2016-06-21 07:28:04 -07:00
|
|
|
struct {
|
2017-07-13 17:17:22 -07:00
|
|
|
uint8_t count;
|
2016-06-21 07:28:04 -07:00
|
|
|
bool reusable : 1;
|
|
|
|
|
};
|
2015-12-29 11:20:52 -08:00
|
|
|
} TSParseActionEntry;
|
|
|
|
|
|
2018-05-17 17:59:50 -07:00
|
|
|
struct TSLanguage {
|
2017-01-31 10:21:47 -08:00
|
|
|
uint32_t version;
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t symbol_count;
|
2017-07-31 11:45:24 -07:00
|
|
|
uint32_t alias_count;
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t token_count;
|
2016-12-02 22:03:48 -08:00
|
|
|
uint32_t external_token_count;
|
2014-07-20 20:27:33 -07:00
|
|
|
const char **symbol_names;
|
2015-11-22 13:32:20 -08:00
|
|
|
const TSSymbolMetadata *symbol_metadata;
|
2017-07-13 17:17:22 -07:00
|
|
|
const uint16_t *parse_table;
|
2015-12-29 11:20:52 -08:00
|
|
|
const TSParseActionEntry *parse_actions;
|
2016-11-30 09:34:47 -08:00
|
|
|
const TSLexMode *lex_modes;
|
2017-07-31 11:45:24 -07:00
|
|
|
const TSSymbol *alias_sequences;
|
2019-02-08 16:14:18 -08:00
|
|
|
uint16_t max_alias_sequence_length;
|
2016-09-03 22:46:14 -07:00
|
|
|
bool (*lex_fn)(TSLexer *, TSStateId);
|
2018-03-07 11:56:59 -08:00
|
|
|
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
|
|
|
|
TSSymbol keyword_capture_token;
|
2016-11-30 09:34:47 -08:00
|
|
|
struct {
|
2016-12-21 11:24:41 -08:00
|
|
|
const bool *states;
|
|
|
|
|
const TSSymbol *symbol_map;
|
2019-03-21 16:06:06 -07:00
|
|
|
void *(*create)(void);
|
2016-12-21 11:24:41 -08:00
|
|
|
void (*destroy)(void *);
|
|
|
|
|
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
2017-07-17 17:12:36 -07:00
|
|
|
unsigned (*serialize)(void *, char *);
|
|
|
|
|
void (*deserialize)(void *, const char *, unsigned);
|
2016-11-30 09:34:47 -08:00
|
|
|
} external_scanner;
|
2019-02-07 12:29:20 -08:00
|
|
|
uint32_t field_count;
|
2019-02-08 16:06:29 -08:00
|
|
|
const TSFieldMapSlice *field_map_slices;
|
|
|
|
|
const TSFieldMapEntry *field_map_entries;
|
2019-02-07 12:29:20 -08:00
|
|
|
const char **field_names;
|
2019-05-16 16:59:50 -07:00
|
|
|
uint32_t large_state_count;
|
|
|
|
|
const uint16_t *small_parse_table;
|
|
|
|
|
const uint32_t *small_parse_table_map;
|
2019-12-05 17:21:46 -08:00
|
|
|
const TSSymbol *public_symbol_map;
|
2018-05-17 17:59:50 -07:00
|
|
|
};
|
2014-07-10 13:14:52 -07:00
|
|
|
|
2014-10-17 16:20:01 -07:00
|
|
|
/*
|
|
|
|
|
* Lexer Macros
|
|
|
|
|
*/
|
2014-06-09 21:14:38 -07:00
|
|
|
|
2017-03-13 17:03:47 -07:00
|
|
|
#define START_LEXER() \
|
|
|
|
|
bool result = false; \
|
2019-06-19 21:08:59 -07:00
|
|
|
bool skip = false; \
|
2019-10-30 17:11:15 -07:00
|
|
|
bool eof = false; \
|
2017-03-13 17:03:47 -07:00
|
|
|
int32_t lookahead; \
|
2019-06-19 21:08:59 -07:00
|
|
|
goto start; \
|
2017-03-13 17:03:47 -07:00
|
|
|
next_state: \
|
2019-06-19 21:08:59 -07:00
|
|
|
lexer->advance(lexer, skip); \
|
|
|
|
|
start: \
|
|
|
|
|
skip = false; \
|
2019-10-31 14:32:10 -07:00
|
|
|
lookahead = lexer->lookahead;
|
2014-03-28 13:51:32 -07:00
|
|
|
|
2019-06-19 21:08:59 -07:00
|
|
|
#define ADVANCE(state_value) \
|
|
|
|
|
{ \
|
|
|
|
|
state = state_value; \
|
|
|
|
|
goto next_state; \
|
2016-05-19 16:25:44 -07:00
|
|
|
}
|
|
|
|
|
|
2019-06-19 21:08:59 -07:00
|
|
|
#define SKIP(state_value) \
|
|
|
|
|
{ \
|
|
|
|
|
skip = true; \
|
|
|
|
|
state = state_value; \
|
|
|
|
|
goto next_state; \
|
2015-06-12 13:13:43 -07:00
|
|
|
}
|
|
|
|
|
|
2017-03-13 17:03:47 -07:00
|
|
|
#define ACCEPT_TOKEN(symbol_value) \
|
|
|
|
|
result = true; \
|
|
|
|
|
lexer->result_symbol = symbol_value; \
|
|
|
|
|
lexer->mark_end(lexer);
|
2016-05-20 20:26:03 -07:00
|
|
|
|
2017-03-13 17:03:47 -07:00
|
|
|
#define END_STATE() return result;
|
2016-05-20 20:26:03 -07:00
|
|
|
|
2014-10-17 16:20:01 -07:00
|
|
|
/*
|
|
|
|
|
* Parse Table Macros
|
|
|
|
|
*/
|
|
|
|
|
|
2019-05-16 16:59:50 -07:00
|
|
|
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
|
|
|
|
|
|
2016-11-14 08:36:06 -08:00
|
|
|
#define STATE(id) id
|
2018-05-11 12:57:41 -07:00
|
|
|
|
2016-11-14 08:36:06 -08:00
|
|
|
#define ACTIONS(id) id
|
|
|
|
|
|
2017-07-21 10:17:54 -07:00
|
|
|
#define SHIFT(state_value) \
|
|
|
|
|
{ \
|
|
|
|
|
{ \
|
|
|
|
|
.type = TSParseActionTypeShift, \
|
|
|
|
|
.params = {.state = state_value}, \
|
|
|
|
|
} \
|
2015-12-29 21:17:31 -08:00
|
|
|
}
|
|
|
|
|
|
2018-01-29 10:40:59 -08:00
|
|
|
#define SHIFT_REPEAT(state_value) \
|
|
|
|
|
{ \
|
|
|
|
|
{ \
|
|
|
|
|
.type = TSParseActionTypeShift, \
|
|
|
|
|
.params = { \
|
|
|
|
|
.state = state_value, \
|
|
|
|
|
.repetition = true \
|
|
|
|
|
}, \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
#define RECOVER() \
|
|
|
|
|
{ \
|
|
|
|
|
{ .type = TSParseActionTypeRecover } \
|
2016-06-27 14:07:47 -07:00
|
|
|
}
|
2016-06-18 20:35:33 -07:00
|
|
|
|
2017-07-13 17:17:22 -07:00
|
|
|
#define SHIFT_EXTRA() \
|
|
|
|
|
{ \
|
|
|
|
|
{ \
|
|
|
|
|
.type = TSParseActionTypeShift, \
|
2017-07-21 10:17:54 -07:00
|
|
|
.params = {.extra = true} \
|
2017-07-13 17:17:22 -07:00
|
|
|
} \
|
2015-12-29 21:17:31 -08:00
|
|
|
}
|
|
|
|
|
|
2017-07-13 17:17:22 -07:00
|
|
|
#define REDUCE(symbol_val, child_count_val, ...) \
|
|
|
|
|
{ \
|
|
|
|
|
{ \
|
|
|
|
|
.type = TSParseActionTypeReduce, \
|
2017-07-21 10:17:54 -07:00
|
|
|
.params = { \
|
|
|
|
|
.symbol = symbol_val, \
|
|
|
|
|
.child_count = child_count_val, \
|
|
|
|
|
__VA_ARGS__ \
|
|
|
|
|
} \
|
2017-07-13 17:17:22 -07:00
|
|
|
} \
|
2016-06-21 07:28:04 -07:00
|
|
|
}
|
|
|
|
|
|
2015-12-29 21:17:31 -08:00
|
|
|
#define ACCEPT_INPUT() \
|
|
|
|
|
{ \
|
|
|
|
|
{ .type = TSParseActionTypeAccept } \
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-05 15:43:00 -08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
2014-02-15 17:00:33 -08:00
|
|
|
|
2014-03-09 22:05:17 -07:00
|
|
|
#endif // TREE_SITTER_PARSER_H_
|