2016-05-09 14:31:44 -07:00
|
|
|
#include "runtime/language.h"
|
2018-05-10 15:11:14 -07:00
|
|
|
#include "runtime/subtree.h"
|
2016-10-05 14:02:49 -07:00
|
|
|
#include "runtime/error_costs.h"
|
2018-06-21 12:54:32 -07:00
|
|
|
#include <string.h>
|
2016-05-09 14:31:44 -07:00
|
|
|
|
2016-06-21 07:28:04 -07:00
|
|
|
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
|
|
|
|
TSSymbol symbol, TableEntry *result) {
|
2018-04-06 09:35:17 -07:00
|
|
|
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
result->action_count = 0;
|
|
|
|
|
result->is_reusable = false;
|
|
|
|
|
result->actions = NULL;
|
2016-06-27 14:07:47 -07:00
|
|
|
} else {
|
2016-11-14 08:36:06 -08:00
|
|
|
assert(symbol < self->token_count);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
uint32_t action_index = self->parse_table[state * self->symbol_count + symbol];
|
|
|
|
|
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
|
|
|
|
result->action_count = entry->count;
|
|
|
|
|
result->is_reusable = entry->reusable;
|
|
|
|
|
result->actions = (const TSParseAction *)(entry + 1);
|
2016-05-09 14:31:44 -07:00
|
|
|
}
|
2016-03-07 20:06:46 -08:00
|
|
|
}
|
|
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t ts_language_symbol_count(const TSLanguage *language) {
|
2017-07-31 11:45:24 -07:00
|
|
|
return language->symbol_count + language->alias_count;
|
2015-10-29 12:45:28 -04:00
|
|
|
}
|
|
|
|
|
|
2017-01-31 10:21:47 -08:00
|
|
|
uint32_t ts_language_version(const TSLanguage *language) {
|
|
|
|
|
return language->version;
|
|
|
|
|
}
|
|
|
|
|
|
2018-04-06 09:35:17 -07:00
|
|
|
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) {
|
2018-04-06 09:35:17 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
2017-09-14 12:07:46 -07:00
|
|
|
return (TSSymbolMetadata){.visible = true, .named = true};
|
2018-04-06 09:35:17 -07:00
|
|
|
} else if (symbol == ts_builtin_sym_error_repeat) {
|
|
|
|
|
return (TSSymbolMetadata){.visible = false, .named = false};
|
2017-07-18 12:01:52 -07:00
|
|
|
} else {
|
2016-03-02 09:55:25 -08:00
|
|
|
return language->symbol_metadata[symbol];
|
2017-07-18 12:01:52 -07:00
|
|
|
}
|
2016-03-02 09:55:25 -08:00
|
|
|
}
|
|
|
|
|
|
2016-02-19 15:41:30 -08:00
|
|
|
const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) {
|
2017-07-18 12:01:52 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
2016-03-02 09:55:25 -08:00
|
|
|
return "ERROR";
|
2018-04-06 09:35:17 -07:00
|
|
|
} else if (symbol == ts_builtin_sym_error_repeat) {
|
|
|
|
|
return "_ERROR";
|
2017-07-18 12:01:52 -07:00
|
|
|
} else {
|
2016-03-02 09:55:25 -08:00
|
|
|
return language->symbol_names[symbol];
|
2017-07-18 12:01:52 -07:00
|
|
|
}
|
2015-10-29 12:42:52 -04:00
|
|
|
}
|
2017-04-12 09:47:51 -04:00
|
|
|
|
2018-06-21 12:54:32 -07:00
|
|
|
TSSymbol ts_language_symbol_for_name(const TSLanguage *self, const char *name) {
|
2018-08-31 09:46:55 -07:00
|
|
|
if (!strcmp(name, "ERROR")) return ts_builtin_sym_error;
|
|
|
|
|
|
2018-06-21 12:54:32 -07:00
|
|
|
for (TSSymbol i = 0; i < self->symbol_count; i++) {
|
|
|
|
|
if (!strcmp(self->symbol_names[i], name)) {
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-12 09:47:51 -04:00
|
|
|
TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol) {
|
|
|
|
|
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
|
|
|
|
|
if (metadata.named) {
|
|
|
|
|
return TSSymbolTypeRegular;
|
|
|
|
|
} else if (metadata.visible) {
|
|
|
|
|
return TSSymbolTypeAnonymous;
|
|
|
|
|
} else {
|
|
|
|
|
return TSSymbolTypeAuxiliary;
|
|
|
|
|
}
|
|
|
|
|
}
|