2019-01-04 17:33:34 -08:00
|
|
|
#include "./language.h"
|
|
|
|
|
#include "./subtree.h"
|
|
|
|
|
#include "./error_costs.h"
|
2018-06-21 12:54:32 -07:00
|
|
|
#include <string.h>
|
2016-05-09 14:31:44 -07:00
|
|
|
|
2016-06-21 07:28:04 -07:00
|
|
|
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
|
|
|
|
TSSymbol symbol, TableEntry *result) {
|
2018-04-06 09:35:17 -07:00
|
|
|
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
result->action_count = 0;
|
|
|
|
|
result->is_reusable = false;
|
|
|
|
|
result->actions = NULL;
|
2016-06-27 14:07:47 -07:00
|
|
|
} else {
|
2016-11-14 08:36:06 -08:00
|
|
|
assert(symbol < self->token_count);
|
2019-05-16 16:59:50 -07:00
|
|
|
uint32_t action_index = ts_language_lookup(self, state, symbol);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
|
|
|
|
result->action_count = entry->count;
|
|
|
|
|
result->is_reusable = entry->reusable;
|
|
|
|
|
result->actions = (const TSParseAction *)(entry + 1);
|
2016-05-09 14:31:44 -07:00
|
|
|
}
|
2016-03-07 20:06:46 -08:00
|
|
|
}
|
|
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t ts_language_symbol_count(const TSLanguage *language) {
|
2017-07-31 11:45:24 -07:00
|
|
|
return language->symbol_count + language->alias_count;
|
2015-10-29 12:45:28 -04:00
|
|
|
}
|
|
|
|
|
|
2017-01-31 10:21:47 -08:00
|
|
|
uint32_t ts_language_version(const TSLanguage *language) {
|
|
|
|
|
return language->version;
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-06 09:35:17 -07:00
|
|
|
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) {
|
2018-04-06 09:35:17 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
2017-09-14 12:07:46 -07:00
|
|
|
return (TSSymbolMetadata){.visible = true, .named = true};
|
2018-04-06 09:35:17 -07:00
|
|
|
} else if (symbol == ts_builtin_sym_error_repeat) {
|
|
|
|
|
return (TSSymbolMetadata){.visible = false, .named = false};
|
2017-07-18 12:01:52 -07:00
|
|
|
} else {
|
2016-03-02 09:55:25 -08:00
|
|
|
return language->symbol_metadata[symbol];
|
2017-07-18 12:01:52 -07:00
|
|
|
}
|
2016-03-02 09:55:25 -08:00
|
|
|
}
|
|
|
|
|
|
2016-02-19 15:41:30 -08:00
|
|
|
const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) {
|
2017-07-18 12:01:52 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
2016-03-02 09:55:25 -08:00
|
|
|
return "ERROR";
|
2018-04-06 09:35:17 -07:00
|
|
|
} else if (symbol == ts_builtin_sym_error_repeat) {
|
|
|
|
|
return "_ERROR";
|
2017-07-18 12:01:52 -07:00
|
|
|
} else {
|
2016-03-02 09:55:25 -08:00
|
|
|
return language->symbol_names[symbol];
|
2017-07-18 12:01:52 -07:00
|
|
|
}
|
2015-10-29 12:42:52 -04:00
|
|
|
}
|
2017-04-12 09:47:51 -04:00
|
|
|
|
2018-06-21 12:54:32 -07:00
|
|
|
TSSymbol ts_language_symbol_for_name(const TSLanguage *self, const char *name) {
|
2018-08-31 09:46:55 -07:00
|
|
|
if (!strcmp(name, "ERROR")) return ts_builtin_sym_error;
|
|
|
|
|
|
2018-11-13 11:34:12 +01:00
|
|
|
uint32_t count = ts_language_symbol_count(self);
|
|
|
|
|
for (TSSymbol i = 0; i < count; i++) {
|
2018-06-21 12:54:32 -07:00
|
|
|
if (!strcmp(self->symbol_names[i], name)) {
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-12 09:47:51 -04:00
|
|
|
TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol) {
|
|
|
|
|
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
|
|
|
|
|
if (metadata.named) {
|
|
|
|
|
return TSSymbolTypeRegular;
|
|
|
|
|
} else if (metadata.visible) {
|
|
|
|
|
return TSSymbolTypeAnonymous;
|
|
|
|
|
} else {
|
|
|
|
|
return TSSymbolTypeAuxiliary;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-02-07 12:29:20 -08:00
|
|
|
|
|
|
|
|
uint32_t ts_language_field_count(const TSLanguage *self) {
|
2019-02-12 17:20:12 -08:00
|
|
|
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
|
2019-02-07 12:29:20 -08:00
|
|
|
return self->field_count;
|
|
|
|
|
} else {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) {
|
|
|
|
|
uint32_t count = ts_language_field_count(self);
|
|
|
|
|
if (count) {
|
|
|
|
|
return self->field_names[id];
|
|
|
|
|
} else {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TSFieldId ts_language_field_id_for_name(
|
|
|
|
|
const TSLanguage *self,
|
|
|
|
|
const char *name,
|
|
|
|
|
uint32_t name_length
|
|
|
|
|
) {
|
|
|
|
|
uint32_t count = ts_language_field_count(self);
|
|
|
|
|
for (TSSymbol i = 1; i < count + 1; i++) {
|
|
|
|
|
switch (strncmp(name, self->field_names[i], name_length)) {
|
|
|
|
|
case 0:
|
2019-09-11 14:44:49 -07:00
|
|
|
if (self->field_names[i][name_length] == 0) return i;
|
|
|
|
|
break;
|
2019-02-07 12:29:20 -08:00
|
|
|
case -1:
|
|
|
|
|
return 0;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|