From fcaabea0cf38fa1c1bfbe65b7c049399329ea67a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 21 Oct 2019 13:31:49 -0700 Subject: [PATCH 1/2] Allow non-terminal extras --- .../build_tables/build_parse_table.rs | 138 +++++++++++++++--- .../build_tables/minimize_parse_table.rs | 39 +++-- .../prepare_grammar/extract_tokens.rs | 32 +--- cli/src/generate/render.rs | 27 +++- cli/src/generate/tables.rs | 30 +++- lib/src/parser.c | 34 ++++- .../extra_non_terminals/corpus.txt | 22 +++ .../extra_non_terminals/grammar.json | 35 +++++ 8 files changed, 274 insertions(+), 83 deletions(-) create mode 100644 test/fixtures/test_grammars/extra_non_terminals/corpus.txt create mode 100644 test/fixtures/test_grammars/extra_non_terminals/grammar.json diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 41d3932c..8a59c977 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -7,7 +7,7 @@ use crate::generate::grammars::{ use crate::generate::node_types::VariableInfo; use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet}; use crate::generate::tables::{ - FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, + FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, ProductionInfo, ProductionInfoId, }; use core::ops::Range; @@ -16,17 +16,19 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::fmt::Write; use std::u32; +// For conflict reporting, each parse state is associated with an example +// sequence of symbols that could lead to that parse state. +type SymbolSequence = Vec; + +type AuxiliarySymbolSequence = Vec; +pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); + #[derive(Clone)] struct AuxiliarySymbolInfo { auxiliary_symbol: Symbol, parent_symbols: Vec, } -type SymbolSequence = Vec; -type AuxiliarySymbolSequence = Vec; - -pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>); - struct ParseStateQueueEntry { state_id: ParseStateId, preceding_auxiliary_symbols: AuxiliarySymbolSequence, @@ -41,6 +43,7 @@ struct ParseTableBuilder<'a> { state_ids_by_item_set: HashMap, ParseStateId>, parse_state_info_by_id: Vec>, parse_state_queue: VecDeque, + non_terminal_extra_states: Vec<(Symbol, usize)>, parse_table: ParseTable, } @@ -52,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> { .push(ProductionInfo::default()); // Add the error state at index 0. - self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); + self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false); // Add the starting state at index 1. self.add_parse_state( @@ -66,8 +69,40 @@ impl<'a> ParseTableBuilder<'a> { .iter() .cloned(), ), + false, ); + // Compute the possible item sets for non-terminal extras. + let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new(); + for extra_non_terminal in self + .syntax_grammar + .extra_tokens + .iter() + .filter(|s| s.is_non_terminal()) + { + let variable = &self.syntax_grammar.variables[extra_non_terminal.index]; + for production in &variable.productions { + non_terminal_extra_item_sets_by_first_terminal + .entry(production.first_symbol().unwrap()) + .or_insert(ParseItemSet::default()) + .insert( + ParseItem { + variable_index: extra_non_terminal.index as u32, + production, + step_index: 1, + }, + &[Symbol::end()].iter().cloned().collect(), + ); + } + } + + // Add a state for each starting terminal of a non-terminal extra rule. + for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal { + self.non_terminal_extra_states + .push((terminal, self.parse_table.states.len())); + self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true); + } + while let Some(entry) = self.parse_state_queue.pop_front() { let item_set = self .item_set_builder @@ -91,9 +126,15 @@ impl<'a> ParseTableBuilder<'a> { preceding_symbols: &SymbolSequence, preceding_auxiliary_symbols: &AuxiliarySymbolSequence, item_set: ParseItemSet<'a>, + is_non_terminal_extra: bool, ) -> ParseStateId { match self.state_ids_by_item_set.entry(item_set) { + // If an equivalent item set has already been processed, then return + // the existing parse state index. Entry::Occupied(o) => *o.get(), + + // Otherwise, insert a new parse state and add it to the queue of + // parse states to populate. Entry::Vacant(v) => { let core = v.key().core(); let core_count = self.core_ids_by_core.len(); @@ -116,6 +157,7 @@ impl<'a> ParseTableBuilder<'a> { terminal_entries: HashMap::new(), nonterminal_entries: HashMap::new(), core_id, + is_non_terminal_extra, }); self.parse_state_queue.push_back(ParseStateQueueEntry { state_id, @@ -138,7 +180,12 @@ impl<'a> ParseTableBuilder<'a> { let mut non_terminal_successors = BTreeMap::new(); let mut lookaheads_with_conflicts = TokenSet::new(); + // Each item in the item set contributes to either or a Shift action or a Reduce + // action in this state. for (item, lookaheads) in &item_set.entries { + // If the item is unfinished, then this state has a transition for the item's + // next symbol. Advance the item to its next step and insert the resulting + // item into the successor item set. if let Some(next_symbol) = item.symbol() { let successor = item.successor(); if next_symbol.is_non_terminal() { @@ -160,7 +207,10 @@ impl<'a> ParseTableBuilder<'a> { .or_insert_with(|| ParseItemSet::default()) .insert(successor, lookaheads); } - } else { + } + // If the item is finished, then add a Reduce action to this state based + // on this item. + else { let action = if item.is_augmented() { ParseAction::Accept } else { @@ -179,6 +229,10 @@ impl<'a> ParseTableBuilder<'a> { .terminal_entries .entry(lookahead); let entry = entry.or_insert_with(|| ParseTableEntry::new()); + + // While inserting Reduce actions, eagerly resolve conflicts related + // to precedence: avoid inserting lower-precedence reductions, and + // clear the action list when inserting higher-precedence reductions. if entry.actions.is_empty() { entry.actions.push(action); } else if action.precedence() > entry.actions[0].precedence() { @@ -193,12 +247,16 @@ impl<'a> ParseTableBuilder<'a> { } } + // Having computed the the successor item sets for each symbol, add a new + // parse state for each of these item sets, and add a corresponding Shift + // action to this state. for (symbol, next_item_set) in terminal_successors { preceding_symbols.push(symbol); let next_state_id = self.add_parse_state( &preceding_symbols, &preceding_auxiliary_symbols, next_item_set, + self.parse_table.states[state_id].is_non_terminal_extra, ); preceding_symbols.pop(); @@ -226,13 +284,19 @@ impl<'a> ParseTableBuilder<'a> { &preceding_symbols, &preceding_auxiliary_symbols, next_item_set, + self.parse_table.states[state_id].is_non_terminal_extra, ); preceding_symbols.pop(); self.parse_table.states[state_id] .nonterminal_entries - .insert(symbol, next_state_id); + .insert(symbol, GotoAction::Goto(next_state_id)); } + // For any symbol with multiple actions, perform conflict resolution. + // This will either + // * choose one action over the others using precedence or associativity + // * keep multiple actions if this conflict has been whitelisted in the grammar + // * fail, terminating the parser generation process for symbol in lookaheads_with_conflicts.iter() { self.handle_conflict( &item_set, @@ -243,15 +307,50 @@ impl<'a> ParseTableBuilder<'a> { )?; } + // Finally, add actions for the grammar's `extra` symbols. let state = &mut self.parse_table.states[state_id]; - for extra_token in &self.syntax_grammar.extra_tokens { - state - .terminal_entries - .entry(*extra_token) - .or_insert(ParseTableEntry { - reusable: true, - actions: vec![ParseAction::ShiftExtra], - }); + let is_non_terminal_extra = state.is_non_terminal_extra; + let is_end_of_non_terminal_extra = + is_non_terminal_extra && state.terminal_entries.len() == 1; + + // Add actions for the start tokens of each non-terminal extra rule. + // These actions are added to every state except for the states that are + // alread within non-terminal extras. Non-terminal extras are not allowed + // to nest within each other. + if !is_non_terminal_extra { + for (terminal, state_id) in &self.non_terminal_extra_states { + state + .terminal_entries + .entry(*terminal) + .or_insert(ParseTableEntry { + reusable: true, + actions: vec![ParseAction::Shift { + state: *state_id, + is_repetition: false, + }], + }); + } + } + + // Add ShiftExtra actions for the terminal extra tokens. These actions + // are added to every state except for those at the ends of non-terminal + // extras. + if !is_end_of_non_terminal_extra { + for extra_token in &self.syntax_grammar.extra_tokens { + if extra_token.is_non_terminal() { + state + .nonterminal_entries + .insert(*extra_token, GotoAction::ShiftExtra); + } else { + state + .terminal_entries + .entry(*extra_token) + .or_insert(ParseTableEntry { + reusable: true, + actions: vec![ParseAction::ShiftExtra], + }); + } + } } Ok(()) @@ -362,8 +461,8 @@ impl<'a> ParseTableBuilder<'a> { } } - // If all reduce actions are left associative, remove the SHIFT action. - // If all reduce actions are right associative, remove the REDUCE actions. + // If all Reduce actions are left associative, remove the SHIFT action. + // If all Reduce actions are right associative, remove the REDUCE actions. match (has_left, has_non, has_right) { (true, false, false) => { entry.actions.pop(); @@ -774,6 +873,7 @@ pub(crate) fn build_parse_table<'a>( lexical_grammar, item_set_builder, variable_info, + non_terminal_extra_states: Vec::new(), state_ids_by_item_set: HashMap::new(), core_ids_by_core: HashMap::new(), parse_state_info_by_id: Vec::new(), diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 5ecde0fd..5d8f7f0f 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -2,7 +2,9 @@ use super::token_conflicts::TokenConflictMap; use crate::generate::dedup::split_state_id_groups; use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use crate::generate::rules::{AliasMap, Symbol, TokenSet}; -use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry}; +use crate::generate::tables::{ + GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, +}; use log::info; use std::collections::{HashMap, HashSet}; use std::mem; @@ -101,7 +103,10 @@ impl<'a> Minimizer<'a> { state.update_referenced_states(|other_state_id, state| { if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) { done = false; - state.nonterminal_entries[symbol] + match state.nonterminal_entries.get(symbol) { + Some(GotoAction::Goto(state_id)) => *state_id, + _ => other_state_id, + } } else { other_state_id } @@ -262,18 +267,24 @@ impl<'a> Minimizer<'a> { for (symbol, s1) in &state1.nonterminal_entries { if let Some(s2) = state2.nonterminal_entries.get(symbol) { - let group1 = group_ids_by_state_id[*s1]; - let group2 = group_ids_by_state_id[*s2]; - if group1 != group2 { - info!( - "split states {} {} - successors for {} are split: {} {}", - state1.id, - state2.id, - self.symbol_name(symbol), - s1, - s2, - ); - return true; + match (s1, s2) { + (GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue, + (GotoAction::Goto(s1), GotoAction::Goto(s2)) => { + let group1 = group_ids_by_state_id[*s1]; + let group2 = group_ids_by_state_id[*s2]; + if group1 != group2 { + info!( + "split states {} {} - successors for {} are split: {} {}", + state1.id, + state2.id, + self.symbol_name(symbol), + s1, + s2, + ); + return true; + } + } + _ => return true, } } } diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index def35b97..de37b1f9 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -93,15 +93,7 @@ pub(super) fn extract_tokens( let mut extra_tokens = Vec::new(); for rule in grammar.extra_tokens { if let Rule::Symbol(symbol) = rule { - let new_symbol = symbol_replacer.replace_symbol(symbol); - if new_symbol.is_non_terminal() { - return Error::err(format!( - "Non-token symbol '{}' cannot be used as an extra token", - &variables[new_symbol.index].name - )); - } else { - extra_tokens.push(new_symbol); - } + extra_tokens.push(symbol_replacer.replace_symbol(symbol)); } else { if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) { extra_tokens.push(Symbol::terminal(index)); @@ -472,28 +464,6 @@ mod test { ); } - #[test] - fn test_error_on_non_terminal_symbol_extras() { - let mut grammar = build_grammar(vec![ - Variable::named("rule_0", Rule::non_terminal(1)), - Variable::named("rule_1", Rule::non_terminal(2)), - Variable::named("rule_2", Rule::string("x")), - ]); - grammar.extra_tokens = vec![Rule::non_terminal(1)]; - - match extract_tokens(grammar) { - Err(e) => { - assert_eq!( - e.message(), - "Non-token symbol 'rule_1' cannot be used as an extra token" - ); - } - _ => { - panic!("Expected an error but got no error"); - } - } - } - #[test] fn test_error_on_external_with_same_name_as_non_terminal() { let mut grammar = build_grammar(vec![ diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 34d8f391..e2afa893 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -2,7 +2,8 @@ use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType use super::nfa::CharacterSet; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; use super::tables::{ - AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry, + AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable, + ParseTableEntry, }; use core::ops::Range; use std::cmp; @@ -678,7 +679,12 @@ impl Generator { add_line!(self, "static TSLexMode ts_lex_modes[STATE_COUNT] = {{"); indent!(self); for (i, state) in self.parse_table.states.iter().enumerate() { - if state.external_lex_state_id > 0 { + if state.is_non_terminal_extra + && state.terminal_entries.len() == 1 + && *state.terminal_entries.iter().next().unwrap().0 == Symbol::end() + { + add_line!(self, "[{}] = {{-1}},", i,); + } else if state.external_lex_state_id > 0 { add_line!( self, "[{}] = {{.lex_state = {}, .external_lex_state = {}}},", @@ -807,12 +813,15 @@ impl Generator { terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0)); nonterminal_entries.sort_unstable_by_key(|k| k.0); - for (symbol, state_id) in &nonterminal_entries { + for (symbol, action) in &nonterminal_entries { add_line!( self, "[{}] = STATE({}),", self.symbol_ids[symbol], - *state_id + match action { + GotoAction::Goto(state) => *state, + GotoAction::ShiftExtra => i, + } ); } @@ -865,9 +874,15 @@ impl Generator { .or_default() .push(**symbol); } - for (symbol, state_id) in &state.nonterminal_entries { + for (symbol, action) in &state.nonterminal_entries { + let state_id = match action { + GotoAction::Goto(i) => *i, + GotoAction::ShiftExtra => { + self.large_state_count + small_state_indices.len() - 1 + } + }; symbols_by_value - .entry((*state_id, SymbolType::NonTerminal)) + .entry((state_id, SymbolType::NonTerminal)) .or_default() .push(*symbol); } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index fb593953..6f4c34a6 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -24,6 +24,12 @@ pub(crate) enum ParseAction { }, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum GotoAction { + Goto(ParseStateId), + ShiftExtra, +} + #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct ParseTableEntry { pub actions: Vec, @@ -34,10 +40,11 @@ pub(crate) struct ParseTableEntry { pub(crate) struct ParseState { pub id: ParseStateId, pub terminal_entries: HashMap, - pub nonterminal_entries: HashMap, + pub nonterminal_entries: HashMap, pub lex_state_id: usize, pub external_lex_state_id: usize, pub core_id: usize, + pub is_non_terminal_extra: bool, } #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] @@ -103,7 +110,13 @@ impl ParseState { _ => None, }) }) - .chain(self.nonterminal_entries.iter().map(|(_, state)| *state)) + .chain(self.nonterminal_entries.iter().filter_map(|(_, action)| { + if let GotoAction::Goto(state) = action { + Some(*state) + } else { + None + } + })) } pub fn update_referenced_states(&mut self, mut f: F) @@ -121,15 +134,18 @@ impl ParseState { } } } - for (symbol, other_state) in &self.nonterminal_entries { - let result = f(*other_state, self); - if result != *other_state { - updates.push((*symbol, 0, result)); + for (symbol, action) in &self.nonterminal_entries { + if let GotoAction::Goto(other_state) = action { + let result = f(*other_state, self); + if result != *other_state { + updates.push((*symbol, 0, result)); + } } } for (symbol, action_index, new_state) in updates { if symbol.is_non_terminal() { - self.nonterminal_entries.insert(symbol, new_state); + self.nonterminal_entries + .insert(symbol, GotoAction::Goto(new_state)); } else { let entry = self.terminal_entries.get_mut(&symbol).unwrap(); if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] { diff --git a/lib/src/parser.c b/lib/src/parser.c index 35772ed0..23bae017 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -351,6 +351,7 @@ static Subtree ts_parser__lex( Length start_position = ts_stack_position(self->stack, version); Subtree external_token = ts_stack_last_external_token(self->stack, version); TSLexMode lex_mode = self->language->lex_modes[parse_state]; + if (lex_mode.lex_state == (uint16_t)-1) return NULL_SUBTREE; const bool *valid_external_tokens = ts_language_enabled_external_tokens( self->language, lex_mode.external_lex_state @@ -748,7 +749,8 @@ static StackVersion ts_parser__reduce( uint32_t count, int dynamic_precedence, uint16_t production_id, - bool fragile + bool is_fragile, + bool is_extra ) { uint32_t initial_version_count = ts_stack_version_count(self->stack); uint32_t removed_version_count = 0; @@ -813,7 +815,8 @@ static StackVersion ts_parser__reduce( TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); - if (fragile || pop.size > 1 || initial_version_count > 1) { + if (is_extra) parent.ptr->extra = true; + if (is_fragile || pop.size > 1 || initial_version_count > 1) { parent.ptr->fragile_left = true; parent.ptr->fragile_right = true; parent.ptr->parse_state = TS_TREE_STATE_NONE; @@ -962,7 +965,7 @@ static bool ts_parser__do_all_potential_reductions( reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, action.dynamic_precedence, action.production_id, - true + true, false ); } @@ -1366,8 +1369,17 @@ static bool ts_parser__advance( // Otherwise, re-run the lexer. if (!lookahead.ptr) { lookahead = ts_parser__lex(self, version, state); - ts_parser__set_cached_token(self, position, last_external_token, lookahead); - ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); + if (lookahead.ptr) { + ts_parser__set_cached_token(self, position, last_external_token, lookahead); + ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry); + } + + // When parsing a non-terminal extra, a null lookahead indicates the + // end of the rule. The reduction is stored in the EOF table entry. + // After the reduction, the lexer needs to be run again. + else { + ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry); + } } for (;;) { @@ -1422,11 +1434,12 @@ static bool ts_parser__advance( case TSParseActionTypeReduce: { bool is_fragile = table_entry.action_count > 1; + bool is_extra = lookahead.ptr == NULL; LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackVersion reduction_version = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, action.params.dynamic_precedence, action.params.production_id, - is_fragile + is_fragile, is_extra ); if (reduction_version != STACK_VERSION_NONE) { last_reduction_version = reduction_version; @@ -1459,6 +1472,15 @@ static bool ts_parser__advance( ts_stack_renumber_version(self->stack, last_reduction_version, version); LOG_STACK(); state = ts_stack_state(self->stack, version); + + // At the end of a non-terminal extra rule, the lexer will return a + // null subtree, because the parser needs to perform a fixed reduction + // regardless of the lookahead node. After performing that reduction, + // (and completing the non-terminal extra rule) run the lexer again based + // on the current parse state. + if (!lookahead.ptr) { + lookahead = ts_parser__lex(self, version, state); + } ts_language_table_entry( self->language, state, diff --git a/test/fixtures/test_grammars/extra_non_terminals/corpus.txt b/test/fixtures/test_grammars/extra_non_terminals/corpus.txt new file mode 100644 index 00000000..52b7d864 --- /dev/null +++ b/test/fixtures/test_grammars/extra_non_terminals/corpus.txt @@ -0,0 +1,22 @@ +============== +No extras +============== + +a b c d + +--- + +(module) + +============== +Extras +============== + +a (one) b (two) (three) c d + +--- + +(module + (comment) + (comment) + (comment)) diff --git a/test/fixtures/test_grammars/extra_non_terminals/grammar.json b/test/fixtures/test_grammars/extra_non_terminals/grammar.json new file mode 100644 index 00000000..7302668d --- /dev/null +++ b/test/fixtures/test_grammars/extra_non_terminals/grammar.json @@ -0,0 +1,35 @@ +{ + "name": "extra_non_terminals", + + "extras": [ + {"type": "PATTERN", "value": "\\s"}, + {"type": "SYMBOL", "name": "comment"} + ], + + "rules": { + "module": { + "type": "SEQ", + "members": [ + {"type": "STRING", "value": "a"}, + {"type": "STRING", "value": "b"}, + {"type": "STRING", "value": "c"}, + {"type": "STRING", "value": "d"} + ] + }, + + "comment": { + "type": "SEQ", + "members": [ + {"type": "STRING", "value": "("}, + { + "type": "REPEAT", + "content": { + "type": "PATTERN", + "value": "[a-z]+" + } + }, + {"type": "STRING", "value": ")"} + ] + } + } +} From 0cceca7b4e9bad7080bffafb3f2be8080a56acc4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 21 Oct 2019 17:26:01 -0700 Subject: [PATCH 2/2] Rename extra_tokens -> extra_symbols --- .../generate/build_tables/build_parse_table.rs | 6 +++--- cli/src/generate/grammars.rs | 4 ++-- cli/src/generate/node_types.rs | 10 +++++----- cli/src/generate/parse_grammar.rs | 4 ++-- .../generate/prepare_grammar/expand_repeats.rs | 2 +- .../prepare_grammar/extract_simple_aliases.rs | 2 +- .../generate/prepare_grammar/extract_tokens.rs | 18 +++++++++--------- .../prepare_grammar/flatten_grammar.rs | 2 +- .../generate/prepare_grammar/intern_symbols.rs | 10 +++++----- cli/src/generate/prepare_grammar/mod.rs | 2 +- .../prepare_grammar/process_inlines.rs | 6 +++--- 11 files changed, 33 insertions(+), 33 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 8a59c977..c63701ee 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -76,7 +76,7 @@ impl<'a> ParseTableBuilder<'a> { let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new(); for extra_non_terminal in self .syntax_grammar - .extra_tokens + .extra_symbols .iter() .filter(|s| s.is_non_terminal()) { @@ -336,7 +336,7 @@ impl<'a> ParseTableBuilder<'a> { // are added to every state except for those at the ends of non-terminal // extras. if !is_end_of_non_terminal_extra { - for extra_token in &self.syntax_grammar.extra_tokens { + for extra_token in &self.syntax_grammar.extra_symbols { if extra_token.is_non_terminal() { state .nonterminal_entries @@ -843,7 +843,7 @@ fn populate_following_tokens( } } } - for extra in &grammar.extra_tokens { + for extra in &grammar.extra_symbols { if extra.is_terminal() { for entry in result.iter_mut() { entry.insert(*extra); diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 0b42c4c3..6cf325dd 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -23,7 +23,7 @@ pub(crate) struct Variable { pub(crate) struct InputGrammar { pub name: String, pub variables: Vec, - pub extra_tokens: Vec, + pub extra_symbols: Vec, pub expected_conflicts: Vec>, pub external_tokens: Vec, pub variables_to_inline: Vec, @@ -87,7 +87,7 @@ pub(crate) struct ExternalToken { #[derive(Debug, Default)] pub(crate) struct SyntaxGrammar { pub variables: Vec, - pub extra_tokens: Vec, + pub extra_symbols: Vec, pub expected_conflicts: Vec>, pub external_tokens: Vec, pub supertype_symbols: Vec, diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 2b88e54f..59594d89 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -689,7 +689,7 @@ mod tests { fn test_node_types_simple() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), @@ -775,7 +775,7 @@ mod tests { fn test_node_types_with_supertypes() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), @@ -862,7 +862,7 @@ mod tests { fn test_node_types_for_children_without_fields() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), @@ -960,7 +960,7 @@ mod tests { fn test_node_types_for_aliased_nodes() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), @@ -1036,7 +1036,7 @@ mod tests { fn test_node_types_with_multiple_valued_fields() { let node_types = get_node_types(InputGrammar { name: String::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index feb560a9..c01dbd99 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -87,7 +87,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { }) } - let extra_tokens = grammar_json + let extra_symbols = grammar_json .extras .unwrap_or(Vec::new()) .into_iter() @@ -107,7 +107,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { name: grammar_json.name, word_token: grammar_json.word, variables, - extra_tokens, + extra_symbols, expected_conflicts, external_tokens, supertype_symbols, diff --git a/cli/src/generate/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs index ccc83d97..0660f06e 100644 --- a/cli/src/generate/prepare_grammar/expand_repeats.rs +++ b/cli/src/generate/prepare_grammar/expand_repeats.rs @@ -283,7 +283,7 @@ mod tests { fn build_grammar(variables: Vec) -> ExtractedSyntaxGrammar { ExtractedSyntaxGrammar { variables, - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs index 9a0b7fbb..6da009d5 100644 --- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs +++ b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs @@ -146,7 +146,7 @@ mod tests { }], }, ], - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), supertype_symbols: Vec::new(), diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index de37b1f9..ae6e7244 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -90,13 +90,13 @@ pub(super) fn extract_tokens( .collect(); let mut separators = Vec::new(); - let mut extra_tokens = Vec::new(); - for rule in grammar.extra_tokens { + let mut extra_symbols = Vec::new(); + for rule in grammar.extra_symbols { if let Rule::Symbol(symbol) = rule { - extra_tokens.push(symbol_replacer.replace_symbol(symbol)); + extra_symbols.push(symbol_replacer.replace_symbol(symbol)); } else { if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) { - extra_tokens.push(Symbol::terminal(index)); + extra_symbols.push(Symbol::terminal(index)); } else { separators.push(rule); } @@ -150,7 +150,7 @@ pub(super) fn extract_tokens( ExtractedSyntaxGrammar { variables, expected_conflicts, - extra_tokens, + extra_symbols, variables_to_inline, supertype_symbols, external_tokens, @@ -407,15 +407,15 @@ mod test { } #[test] - fn test_extracting_extra_tokens() { + fn test_extracting_extra_symbols() { let mut grammar = build_grammar(vec![ Variable::named("rule_0", Rule::string("x")), Variable::named("comment", Rule::pattern("//.*")), ]); - grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)]; + grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)]; let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap(); - assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]); + assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]); assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]); } @@ -492,7 +492,7 @@ mod test { fn build_grammar(variables: Vec) -> InternedGrammar { InternedGrammar { variables, - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index e325776c..f2b43a04 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -199,7 +199,7 @@ unless they are used only as the grammar's start rule. } } Ok(SyntaxGrammar { - extra_tokens: grammar.extra_tokens, + extra_symbols: grammar.extra_symbols, expected_conflicts: grammar.expected_conflicts, variables_to_inline: grammar.variables_to_inline, external_tokens: grammar.external_tokens, diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index 4c0fc5c7..7cd411ef 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -30,9 +30,9 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result external_tokens.push(Variable { name, kind, rule }); } - let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len()); - for extra_token in grammar.extra_tokens.iter() { - extra_tokens.push(interner.intern_rule(extra_token)?); + let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len()); + for extra_token in grammar.extra_symbols.iter() { + extra_symbols.push(interner.intern_rule(extra_token)?); } let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len()); @@ -76,7 +76,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result Ok(InternedGrammar { variables, external_tokens, - extra_tokens, + extra_symbols, expected_conflicts, variables_to_inline, supertype_symbols, @@ -236,7 +236,7 @@ mod tests { InputGrammar { variables, name: "the_language".to_string(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index a574aefb..029483d3 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -21,7 +21,7 @@ use crate::generate::rules::{AliasMap, Rule, Symbol}; pub(crate) struct IntermediateGrammar { variables: Vec, - extra_tokens: Vec, + extra_symbols: Vec, expected_conflicts: Vec>, external_tokens: Vec, variables_to_inline: Vec, diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index 68568419..9ef89d75 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -196,7 +196,7 @@ mod tests { fn test_basic_inlining() { let grammar = SyntaxGrammar { expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None, @@ -327,7 +327,7 @@ mod tests { Symbol::non_terminal(3), ], expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None, @@ -429,7 +429,7 @@ mod tests { }, ], expected_conflicts: Vec::new(), - extra_tokens: Vec::new(), + extra_symbols: Vec::new(), external_tokens: Vec::new(), supertype_symbols: Vec::new(), word_token: None,