From 5e04daf483d1ce8344895c37bcec55cdbbef9a75 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 13 Aug 2019 15:57:42 -0700 Subject: [PATCH] Avoid non-deterministic set iteration order when handling conflicts --- .../generate/build_tables/build_parse_table.rs | 4 ++-- cli/src/generate/build_tables/item.rs | 15 +++++++++++++++ .../conflict_in_repeat_rule/expected_error.txt | 6 +++--- .../expected_error.txt | 6 +++--- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 5f7e301b..4c242a74 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -132,7 +132,7 @@ impl<'a> ParseTableBuilder<'a> { ) -> Result<()> { let mut terminal_successors = HashMap::new(); let mut non_terminal_successors = HashMap::new(); - let mut lookaheads_with_conflicts = HashSet::new(); + let mut lookaheads_with_conflicts = TokenSet::new(); for (item, lookaheads) in &item_set.entries { if let Some(next_symbol) = item.symbol() { @@ -229,7 +229,7 @@ impl<'a> ParseTableBuilder<'a> { .insert(symbol, next_state_id); } - for symbol in lookaheads_with_conflicts { + for symbol in lookaheads_with_conflicts.iter() { self.handle_conflict( &item_set, state_id, diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index 3bea6293..5ee0144b 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -149,6 +149,21 @@ impl TokenSet { vec.set(other.index, true); } + pub fn remove(&mut self, other: &Symbol) { + let vec = match other.kind { + SymbolType::NonTerminal => panic!("Cannot store non-terminals in a TokenSet"), + SymbolType::Terminal => &mut self.terminal_bits, + SymbolType::External => &mut self.external_bits, + SymbolType::End => { + self.eof = false; + return; + } + }; + if other.index < vec.len() { + vec.set(other.index, false); + } + } + pub fn insert_all_terminals(&mut self, other: &TokenSet) -> bool { let mut result = false; if other.terminal_bits.len() > self.terminal_bits.len() { diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt index 934f91c7..9be3e71e 100644 --- a/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt +++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt @@ -1,11 +1,11 @@ Unresolved conflict for symbol sequence: - '[' identifier • identifier … + '[' identifier • ']' … Possible interpretations: - 1: '[' (array_repeat1 identifier) • identifier … - 2: '[' (array_type_repeat1 identifier) • identifier … + 1: '[' (array_repeat1 identifier) • ']' … + 2: '[' (array_type_repeat1 identifier) • ']' … Possible resolutions: diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt b/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt index 2aabe1e4..779b4e29 100644 --- a/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt +++ b/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt @@ -1,11 +1,11 @@ Unresolved conflict for symbol sequence: - _program_start '[' identifier • identifier … + _program_start '[' identifier • ']' … Possible interpretations: - 1: _program_start '[' (array_repeat1 identifier) • identifier … - 2: _program_start '[' (array_type_repeat1 identifier) • identifier … + 1: _program_start '[' (array_repeat1 identifier) • ']' … + 2: _program_start '[' (array_type_repeat1 identifier) • ']' … Possible resolutions: