2016-05-09 14:31:44 -07:00
|
|
|
#include "runtime/language.h"
|
|
|
|
|
#include "runtime/tree.h"
|
2016-10-05 14:02:49 -07:00
|
|
|
#include "runtime/error_costs.h"
|
2016-05-09 14:31:44 -07:00
|
|
|
|
2016-06-21 07:28:04 -07:00
|
|
|
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
|
|
|
|
TSSymbol symbol, TableEntry *result) {
|
2016-06-27 14:07:47 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
result->action_count = 0;
|
|
|
|
|
result->is_reusable = false;
|
|
|
|
|
result->actions = NULL;
|
|
|
|
|
return;
|
2016-06-27 14:07:47 -07:00
|
|
|
} else {
|
2016-11-14 08:36:06 -08:00
|
|
|
assert(symbol < self->token_count);
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
uint32_t action_index = self->parse_table[state * self->symbol_count + symbol];
|
|
|
|
|
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
|
|
|
|
result->action_count = entry->count;
|
|
|
|
|
result->is_reusable = entry->reusable;
|
|
|
|
|
result->depends_on_lookahead = entry->depends_on_lookahead;
|
|
|
|
|
result->actions = (const TSParseAction *)(entry + 1);
|
2016-05-09 14:31:44 -07:00
|
|
|
}
|
2016-03-07 20:06:46 -08:00
|
|
|
}
|
|
|
|
|
|
2016-11-14 12:15:24 -08:00
|
|
|
uint32_t ts_language_symbol_count(const TSLanguage *language) {
|
2017-07-31 11:45:24 -07:00
|
|
|
return language->symbol_count + language->alias_count;
|
2015-10-29 12:45:28 -04:00
|
|
|
}
|
|
|
|
|
|
2017-01-31 10:21:47 -08:00
|
|
|
uint32_t ts_language_version(const TSLanguage *language) {
|
|
|
|
|
return language->version;
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-07 20:06:46 -08:00
|
|
|
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language,
|
|
|
|
|
TSSymbol symbol) {
|
2017-07-18 12:01:52 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
2016-03-02 09:55:25 -08:00
|
|
|
return (TSSymbolMetadata){
|
2016-03-07 20:06:46 -08:00
|
|
|
.visible = true, .named = true, .extra = false, .structural = true,
|
2016-03-02 09:55:25 -08:00
|
|
|
};
|
2017-07-18 12:01:52 -07:00
|
|
|
} else {
|
2016-03-02 09:55:25 -08:00
|
|
|
return language->symbol_metadata[symbol];
|
2017-07-18 12:01:52 -07:00
|
|
|
}
|
2016-03-02 09:55:25 -08:00
|
|
|
}
|
|
|
|
|
|
2016-02-19 15:41:30 -08:00
|
|
|
const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) {
|
2017-07-18 12:01:52 -07:00
|
|
|
if (symbol == ts_builtin_sym_error) {
|
2016-03-02 09:55:25 -08:00
|
|
|
return "ERROR";
|
2017-07-18 12:01:52 -07:00
|
|
|
} else {
|
2016-03-02 09:55:25 -08:00
|
|
|
return language->symbol_names[symbol];
|
2017-07-18 12:01:52 -07:00
|
|
|
}
|
2015-10-29 12:42:52 -04:00
|
|
|
}
|
2017-04-12 09:47:51 -04:00
|
|
|
|
|
|
|
|
TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol) {
|
|
|
|
|
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
|
|
|
|
|
if (metadata.named) {
|
|
|
|
|
return TSSymbolTypeRegular;
|
|
|
|
|
} else if (metadata.visible) {
|
|
|
|
|
return TSSymbolTypeAnonymous;
|
|
|
|
|
} else {
|
|
|
|
|
return TSSymbolTypeAuxiliary;
|
|
|
|
|
}
|
|
|
|
|
}
|