Add a TSLexer.eof() API, use it in generated parsers
This commit is contained in:
parent
a62b7a70f3
commit
d3b7caa565
6 changed files with 24 additions and 11 deletions
|
|
@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex;
|
|||
use super::token_conflicts::TokenConflictMap;
|
||||
use crate::generate::dedup::split_state_id_groups;
|
||||
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::nfa::NfaCursor;
|
||||
use crate::generate::rules::{Symbol, TokenSet};
|
||||
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
|
||||
use log::info;
|
||||
|
|
@ -189,13 +189,10 @@ impl<'a> LexTableBuilder<'a> {
|
|||
// character that leads to the empty set of NFA states.
|
||||
if eof_valid {
|
||||
let (next_state_id, _) = self.add_state(Vec::new(), false);
|
||||
self.table.states[state_id].advance_actions.push((
|
||||
CharacterSet::empty().add_char('\0'),
|
||||
AdvanceAction {
|
||||
state: next_state_id,
|
||||
in_main_token: true,
|
||||
},
|
||||
));
|
||||
self.table.states[state_id].eof_action = Some(AdvanceAction {
|
||||
state: next_state_id,
|
||||
in_main_token: true,
|
||||
});
|
||||
}
|
||||
|
||||
for transition in transitions {
|
||||
|
|
@ -273,6 +270,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
let signature = (
|
||||
i == 0,
|
||||
state.accept_action,
|
||||
state.eof_action.is_some(),
|
||||
state
|
||||
.advance_actions
|
||||
.iter()
|
||||
|
|
@ -320,6 +318,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
for (_, advance_action) in new_state.advance_actions.iter_mut() {
|
||||
advance_action.state = group_ids_by_state_id[advance_action.state];
|
||||
}
|
||||
if let Some(eof_action) = &mut new_state.eof_action {
|
||||
eof_action.state = group_ids_by_state_id[eof_action.state];
|
||||
}
|
||||
new_states.push(new_state);
|
||||
}
|
||||
|
||||
|
|
@ -364,6 +365,9 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
|
|||
for (_, advance_action) in state.advance_actions.iter_mut() {
|
||||
advance_action.state = new_ids_by_old_id[advance_action.state];
|
||||
}
|
||||
if let Some(eof_action) = &mut state.eof_action {
|
||||
eof_action.state = new_ids_by_old_id[eof_action.state];
|
||||
}
|
||||
state
|
||||
})
|
||||
.collect();
|
||||
|
|
|
|||
|
|
@ -540,6 +540,10 @@ impl Generator {
|
|||
add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
|
||||
}
|
||||
|
||||
if let Some(eof_action) = state.eof_action {
|
||||
add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
|
||||
}
|
||||
|
||||
let mut ruled_out_characters = HashSet::new();
|
||||
for (characters, action) in state.advance_actions {
|
||||
let previous_length = self.buffer.len();
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ pub(crate) struct AdvanceAction {
|
|||
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct LexState {
|
||||
pub accept_action: Option<Symbol>,
|
||||
pub eof_action: Option<AdvanceAction>,
|
||||
pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,8 @@ struct TSLexer {
|
|||
void (*advance)(TSLexer *, bool);
|
||||
void (*mark_end)(TSLexer *);
|
||||
uint32_t (*get_column)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(TSLexer *);
|
||||
bool (*is_at_included_range_start)(const TSLexer *);
|
||||
bool (*eof)(const TSLexer *);
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
|
|
@ -126,13 +127,15 @@ struct TSLanguage {
|
|||
#define START_LEXER() \
|
||||
bool result = false; \
|
||||
bool skip = false; \
|
||||
bool eof = false; \
|
||||
int32_t lookahead; \
|
||||
goto start; \
|
||||
next_state: \
|
||||
lexer->advance(lexer, skip); \
|
||||
start: \
|
||||
skip = false; \
|
||||
lookahead = lexer->lookahead;
|
||||
lookahead = lexer->lookahead; \
|
||||
eof = lexer->eof(lexer);
|
||||
|
||||
#define ADVANCE(state_value) \
|
||||
{ \
|
||||
|
|
|
|||
|
|
@ -224,6 +224,7 @@ void ts_lexer_init(Lexer *self) {
|
|||
.mark_end = ts_lexer__mark_end,
|
||||
.get_column = ts_lexer__get_column,
|
||||
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
|
||||
.eof = ts_lexer__eof,
|
||||
.lookahead = 0,
|
||||
.result_symbol = 0,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ typedef struct {
|
|||
Length token_start_position;
|
||||
Length token_end_position;
|
||||
|
||||
TSRange * included_ranges;
|
||||
TSRange *included_ranges;
|
||||
size_t included_range_count;
|
||||
size_t current_included_range_index;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue