2019-01-04 17:33:34 -08:00
|
|
|
#include "./language.h"
|
2024-02-26 13:08:30 -08:00
|
|
|
#include "./wasm_store.h"
|
2023-12-27 14:54:38 -08:00
|
|
|
#include "tree_sitter/api.h"
|
2018-06-21 12:54:32 -07:00
|
|
|
#include <string.h>
|
2016-05-09 14:31:44 -07:00
|
|
|
|
2023-11-27 15:50:08 -08:00
|
|
|
const TSLanguage *ts_language_copy(const TSLanguage *self) {
|
2023-12-27 14:54:38 -08:00
|
|
|
if (self && ts_language_is_wasm(self)) {
|
|
|
|
|
ts_wasm_language_retain(self);
|
|
|
|
|
}
|
2023-11-27 15:50:08 -08:00
|
|
|
return self;
|
|
|
|
|
}
|
|
|
|
|
|
2023-12-27 14:54:38 -08:00
|
|
|
void ts_language_delete(const TSLanguage *self) {
|
|
|
|
|
if (self && ts_language_is_wasm(self)) {
|
|
|
|
|
ts_wasm_language_release(self);
|
|
|
|
|
}
|
2023-11-27 15:50:08 -08:00
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
uint32_t ts_language_symbol_count(const TSLanguage *self) {
|
|
|
|
|
return self->symbol_count + self->alias_count;
|
|
|
|
|
}
|
|
|
|
|
|
2023-05-17 10:39:37 +03:00
|
|
|
uint32_t ts_language_state_count(const TSLanguage *self) {
|
|
|
|
|
return self->state_count;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
uint32_t ts_language_version(const TSLanguage *self) {
|
|
|
|
|
return self->version;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint32_t ts_language_field_count(const TSLanguage *self) {
|
2021-02-25 16:12:29 -08:00
|
|
|
return self->field_count;
|
2019-12-05 17:21:46 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ts_language_table_entry(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
TSStateId state,
|
|
|
|
|
TSSymbol symbol,
|
|
|
|
|
TableEntry *result
|
|
|
|
|
) {
|
2018-04-06 09:35:17 -07:00
|
|
|
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
result->action_count = 0;
|
|
|
|
|
result->is_reusable = false;
|
|
|
|
|
result->actions = NULL;
|
2016-06-27 14:07:47 -07:00
|
|
|
} else {
|
2016-11-14 08:36:06 -08:00
|
|
|
assert(symbol < self->token_count);
|
2019-05-16 16:59:50 -07:00
|
|
|
uint32_t action_index = ts_language_lookup(self, state, symbol);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
2020-04-28 13:38:08 +02:00
|
|
|
result->action_count = entry->entry.count;
|
|
|
|
|
result->is_reusable = entry->entry.reusable;
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
result->actions = (const TSParseAction *)(entry + 1);
|
2016-05-09 14:31:44 -07:00
|
|
|
}
|
2016-03-07 20:06:46 -08:00
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
TSSymbolMetadata ts_language_symbol_metadata(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
TSSymbol symbol
|
|
|
|
|
) {
|
2018-04-06 09:35:17 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
2021-10-12 18:00:43 -07:00
|
|
|
return (TSSymbolMetadata) {.visible = true, .named = true};
|
2018-04-06 09:35:17 -07:00
|
|
|
} else if (symbol == ts_builtin_sym_error_repeat) {
|
2021-10-12 18:00:43 -07:00
|
|
|
return (TSSymbolMetadata) {.visible = false, .named = false};
|
2017-07-18 12:01:52 -07:00
|
|
|
} else {
|
2019-12-05 17:21:46 -08:00
|
|
|
return self->symbol_metadata[symbol];
|
2017-07-18 12:01:52 -07:00
|
|
|
}
|
2016-03-02 09:55:25 -08:00
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
TSSymbol ts_language_public_symbol(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
TSSymbol symbol
|
|
|
|
|
) {
|
|
|
|
|
if (symbol == ts_builtin_sym_error) return symbol;
|
2021-02-25 16:12:29 -08:00
|
|
|
return self->public_symbol_map[symbol];
|
2019-12-05 17:21:46 -08:00
|
|
|
}
|
|
|
|
|
|
2023-05-17 10:39:37 +03:00
|
|
|
TSStateId ts_language_next_state(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
TSStateId state,
|
|
|
|
|
TSSymbol symbol
|
|
|
|
|
) {
|
|
|
|
|
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
|
|
|
|
|
return 0;
|
|
|
|
|
} else if (symbol < self->token_count) {
|
|
|
|
|
uint32_t count;
|
|
|
|
|
const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
|
|
|
|
|
if (count > 0) {
|
|
|
|
|
TSParseAction action = actions[count - 1];
|
|
|
|
|
if (action.type == TSParseActionTypeShift) {
|
|
|
|
|
return action.shift.extra ? state : action.shift.state;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
} else {
|
|
|
|
|
return ts_language_lookup(self, state, symbol);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
const char *ts_language_symbol_name(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
TSSymbol symbol
|
|
|
|
|
) {
|
2017-07-18 12:01:52 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
2016-03-02 09:55:25 -08:00
|
|
|
return "ERROR";
|
2018-04-06 09:35:17 -07:00
|
|
|
} else if (symbol == ts_builtin_sym_error_repeat) {
|
|
|
|
|
return "_ERROR";
|
2020-02-27 22:24:00 +07:00
|
|
|
} else if (symbol < ts_language_symbol_count(self)) {
|
2019-12-05 17:21:46 -08:00
|
|
|
return self->symbol_names[symbol];
|
2020-02-27 22:24:00 +07:00
|
|
|
} else {
|
|
|
|
|
return NULL;
|
2017-07-18 12:01:52 -07:00
|
|
|
}
|
2015-10-29 12:42:52 -04:00
|
|
|
}
|
2017-04-12 09:47:51 -04:00
|
|
|
|
2019-11-15 14:21:13 -08:00
|
|
|
TSSymbol ts_language_symbol_for_name(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
const char *string,
|
|
|
|
|
uint32_t length,
|
|
|
|
|
bool is_named
|
|
|
|
|
) {
|
|
|
|
|
if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
|
2023-07-19 03:49:14 -04:00
|
|
|
uint16_t count = (uint16_t)ts_language_symbol_count(self);
|
2018-11-13 11:34:12 +01:00
|
|
|
for (TSSymbol i = 0; i < count; i++) {
|
2019-11-15 14:21:13 -08:00
|
|
|
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
|
2020-09-21 12:34:48 -07:00
|
|
|
if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
|
2019-11-15 14:21:13 -08:00
|
|
|
const char *symbol_name = self->symbol_names[i];
|
2019-12-05 17:21:46 -08:00
|
|
|
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
|
2021-02-25 16:12:29 -08:00
|
|
|
return self->public_symbol_map[i];
|
2019-12-05 17:21:46 -08:00
|
|
|
}
|
2018-06-21 12:54:32 -07:00
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
TSSymbolType ts_language_symbol_type(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
TSSymbol symbol
|
|
|
|
|
) {
|
|
|
|
|
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
|
2021-03-17 14:12:58 -04:00
|
|
|
if (metadata.named && metadata.visible) {
|
2017-04-12 09:47:51 -04:00
|
|
|
return TSSymbolTypeRegular;
|
|
|
|
|
} else if (metadata.visible) {
|
|
|
|
|
return TSSymbolTypeAnonymous;
|
|
|
|
|
} else {
|
|
|
|
|
return TSSymbolTypeAuxiliary;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
|
2019-12-05 17:21:46 -08:00
|
|
|
const char *ts_language_field_name_for_id(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
TSFieldId id
|
|
|
|
|
) {
|
2019-02-07 12:29:20 -08:00
|
|
|
uint32_t count = ts_language_field_count(self);
|
2020-02-27 21:09:20 +07:00
|
|
|
if (count && id <= count) {
|
2019-02-07 12:29:20 -08:00
|
|
|
return self->field_names[id];
|
|
|
|
|
} else {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TSFieldId ts_language_field_id_for_name(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
const char *name,
|
|
|
|
|
uint32_t name_length
|
|
|
|
|
) {
|
2023-07-19 03:49:14 -04:00
|
|
|
uint16_t count = (uint16_t)ts_language_field_count(self);
|
2019-02-07 12:29:20 -08:00
|
|
|
for (TSSymbol i = 1; i < count + 1; i++) {
|
|
|
|
|
switch (strncmp(name, self->field_names[i], name_length)) {
|
|
|
|
|
case 0:
|
2019-09-11 14:44:49 -07:00
|
|
|
if (self->field_names[i][name_length] == 0) return i;
|
|
|
|
|
break;
|
2019-02-07 12:29:20 -08:00
|
|
|
case -1:
|
|
|
|
|
return 0;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
2023-05-17 10:39:37 +03:00
|
|
|
|
|
|
|
|
TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) {
|
|
|
|
|
if (state >= self->state_count) return NULL;
|
|
|
|
|
LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator));
|
|
|
|
|
*iterator = ts_language_lookaheads(self, state);
|
|
|
|
|
return (TSLookaheadIterator *)iterator;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ts_lookahead_iterator_delete(TSLookaheadIterator *self) {
|
|
|
|
|
ts_free(self);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) {
|
|
|
|
|
LookaheadIterator *iterator = (LookaheadIterator *)self;
|
|
|
|
|
if (state >= iterator->language->state_count) return false;
|
|
|
|
|
*iterator = ts_language_lookaheads(iterator->language, state);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-03 21:00:56 +01:00
|
|
|
const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) {
|
2023-06-15 17:25:08 +03:00
|
|
|
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
|
2023-05-17 10:39:37 +03:00
|
|
|
return iterator->language;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) {
|
|
|
|
|
if (state >= language->state_count) return false;
|
|
|
|
|
LookaheadIterator *iterator = (LookaheadIterator *)self;
|
|
|
|
|
*iterator = ts_language_lookaheads(language, state);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2023-08-09 14:17:54 +03:00
|
|
|
bool ts_lookahead_iterator_next(TSLookaheadIterator *self) {
|
2023-05-17 10:39:37 +03:00
|
|
|
LookaheadIterator *iterator = (LookaheadIterator *)self;
|
2023-08-09 14:17:54 +03:00
|
|
|
return ts_lookahead_iterator__next(iterator);
|
2023-05-17 10:39:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) {
|
2023-06-15 17:25:08 +03:00
|
|
|
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
|
2023-05-17 10:39:37 +03:00
|
|
|
return iterator->symbol;
|
|
|
|
|
}
|
2023-06-15 17:25:08 +03:00
|
|
|
|
|
|
|
|
const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) {
|
|
|
|
|
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
|
|
|
|
|
return ts_language_symbol_name(iterator->language, iterator->symbol);
|
|
|
|
|
}
|