Add a TSLexer.eof() API, use it in generated parsers

This commit is contained in:
Max Brunsfeld 2019-10-30 17:11:15 -07:00
parent a62b7a70f3
commit d3b7caa565
6 changed files with 24 additions and 11 deletions

View file

@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex;
use super::token_conflicts::TokenConflictMap;
use crate::generate::dedup::split_state_id_groups;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::nfa::NfaCursor;
use crate::generate::rules::{Symbol, TokenSet};
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
use log::info;
@ -189,13 +189,10 @@ impl<'a> LexTableBuilder<'a> {
// character that leads to the empty set of NFA states.
if eof_valid {
let (next_state_id, _) = self.add_state(Vec::new(), false);
self.table.states[state_id].advance_actions.push((
CharacterSet::empty().add_char('\0'),
AdvanceAction {
state: next_state_id,
in_main_token: true,
},
));
self.table.states[state_id].eof_action = Some(AdvanceAction {
state: next_state_id,
in_main_token: true,
});
}
for transition in transitions {
@ -273,6 +270,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
let signature = (
i == 0,
state.accept_action,
state.eof_action.is_some(),
state
.advance_actions
.iter()
@ -320,6 +318,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
for (_, advance_action) in new_state.advance_actions.iter_mut() {
advance_action.state = group_ids_by_state_id[advance_action.state];
}
if let Some(eof_action) = &mut new_state.eof_action {
eof_action.state = group_ids_by_state_id[eof_action.state];
}
new_states.push(new_state);
}
@ -364,6 +365,9 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
for (_, advance_action) in state.advance_actions.iter_mut() {
advance_action.state = new_ids_by_old_id[advance_action.state];
}
if let Some(eof_action) = &mut state.eof_action {
eof_action.state = new_ids_by_old_id[eof_action.state];
}
state
})
.collect();

View file

@ -540,6 +540,10 @@ impl Generator {
add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
}
if let Some(eof_action) = state.eof_action {
add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
}
let mut ruled_out_characters = HashSet::new();
for (characters, action) in state.advance_actions {
let previous_length = self.buffer.len();

View file

@ -77,6 +77,7 @@ pub(crate) struct AdvanceAction {
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct LexState {
pub accept_action: Option<Symbol>,
pub eof_action: Option<AdvanceAction>,
pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
}

View file

@ -45,7 +45,8 @@ struct TSLexer {
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
@ -126,13 +127,15 @@ struct TSLanguage {
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
lookahead = lexer->lookahead; \
eof = lexer->eof(lexer);
#define ADVANCE(state_value) \
{ \

View file

@ -224,6 +224,7 @@ void ts_lexer_init(Lexer *self) {
.mark_end = ts_lexer__mark_end,
.get_column = ts_lexer__get_column,
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
.eof = ts_lexer__eof,
.lookahead = 0,
.result_symbol = 0,
},

View file

@ -16,7 +16,7 @@ typedef struct {
Length token_start_position;
Length token_end_position;
TSRange * included_ranges;
TSRange *included_ranges;
size_t included_range_count;
size_t current_included_range_index;