feat: add 'reserved word' construct
Co-authored-by: Amaan Qureshi <amaanq12@gmail.com>
This commit is contained in:
parent
2a63077cac
commit
201b41cf11
31 changed files with 2367 additions and 1628 deletions
72
Cargo.lock
generated
72
Cargo.lock
generated
|
|
@ -80,9 +80,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "anyhow"
|
||||
version = "1.0.94"
|
||||
version = "1.0.95"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7"
|
||||
checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
|
|
@ -168,9 +168,9 @@ checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b"
|
|||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.4"
|
||||
version = "1.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9157bbaa6b165880c27a4293a474c91cdcf265cc68cc829bf10be0964a391caf"
|
||||
checksum = "c31a0499c1dc64f458ad13872de75c0eb7e3fdb0e67964610c914b034fc5956e"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
"libc",
|
||||
|
|
@ -245,9 +245,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "clap_complete"
|
||||
version = "4.5.39"
|
||||
version = "4.5.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd4db298d517d5fa00b2b84bbe044efd3fde43874a41db0d46f91994646a2da4"
|
||||
checksum = "ac2e663e3e3bed2d32d065a8404024dad306e699a04263ec59919529f803aee9"
|
||||
dependencies = [
|
||||
"clap",
|
||||
]
|
||||
|
|
@ -304,15 +304,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "console"
|
||||
version = "0.15.8"
|
||||
version = "0.15.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
|
||||
checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b"
|
||||
dependencies = [
|
||||
"encode_unicode",
|
||||
"lazy_static",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"unicode-width",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -518,9 +518,9 @@ checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d"
|
|||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "0.3.6"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
|
|
@ -594,9 +594,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "foldhash"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2"
|
||||
checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
|
|
@ -697,11 +697,11 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
|||
|
||||
[[package]]
|
||||
name = "home"
|
||||
version = "0.5.9"
|
||||
version = "0.5.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
|
||||
checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -966,9 +966,9 @@ checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.168"
|
||||
version = "0.2.169"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d"
|
||||
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||
|
||||
[[package]]
|
||||
name = "libgit2-sys"
|
||||
|
|
@ -1087,9 +1087,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
|||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.8.0"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
|
||||
checksum = "4ffbe83022cedc1d264172192511ae958937694cd57ce297164951b8b3568394"
|
||||
dependencies = [
|
||||
"adler2",
|
||||
]
|
||||
|
|
@ -1158,9 +1158,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.36.5"
|
||||
version = "0.36.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e"
|
||||
checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"hashbrown 0.15.2",
|
||||
|
|
@ -1494,9 +1494,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.133"
|
||||
version = "1.0.134"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
|
||||
checksum = "d00f4175c42ee48b15416f6193a959ba3a0d67fc699a0db9ad12df9f83991c7d"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
|
|
@ -1591,9 +1591,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
|
|||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.90"
|
||||
version = "2.0.91"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31"
|
||||
checksum = "d53cbcb5a243bd33b7858b1d7f4aca2153490815872d86d955d6ea29f743c035"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -1650,11 +1650,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "2.0.7"
|
||||
version = "2.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767"
|
||||
checksum = "f072643fd0190df67a8bab670c20ef5d8737177d6ac6b2e9a236cb096206b2cc"
|
||||
dependencies = [
|
||||
"thiserror-impl 2.0.7",
|
||||
"thiserror-impl 2.0.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1670,9 +1670,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "thiserror-impl"
|
||||
version = "2.0.7"
|
||||
version = "2.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36"
|
||||
checksum = "7b50fa271071aae2e6ee85f842e2e28ba8cd2c5fb67f11fcb1fd70b276f9e7d4"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -1880,7 +1880,7 @@ dependencies = [
|
|||
"lazy_static",
|
||||
"regex",
|
||||
"streaming-iterator",
|
||||
"thiserror 2.0.7",
|
||||
"thiserror 2.0.9",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
|
|
@ -1919,7 +1919,7 @@ dependencies = [
|
|||
"memchr",
|
||||
"regex",
|
||||
"streaming-iterator",
|
||||
"thiserror 2.0.7",
|
||||
"thiserror 2.0.9",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
|
|
@ -1941,9 +1941,9 @@ checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
|
|||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.14"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
|
||||
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
|
|
|
|||
|
|
@ -43,15 +43,17 @@ pub fn build_lex_table(
|
|||
let tokens = state
|
||||
.terminal_entries
|
||||
.keys()
|
||||
.copied()
|
||||
.chain(state.reserved_words.iter())
|
||||
.filter_map(|token| {
|
||||
if token.is_terminal() {
|
||||
if keywords.contains(token) {
|
||||
if keywords.contains(&token) {
|
||||
syntax_grammar.word_token
|
||||
} else {
|
||||
Some(*token)
|
||||
Some(token)
|
||||
}
|
||||
} else if token.is_eof() {
|
||||
Some(*token)
|
||||
Some(token)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,13 +10,11 @@ use indexmap::{map::Entry, IndexMap};
|
|||
use rustc_hash::FxHasher;
|
||||
|
||||
use super::{
|
||||
item::{ParseItem, ParseItemSet, ParseItemSetCore},
|
||||
item::{ParseItem, ParseItemSet, ParseItemSetCore, ParseItemSetEntry},
|
||||
item_set_builder::ParseItemSetBuilder,
|
||||
};
|
||||
use crate::{
|
||||
grammars::{
|
||||
InlinedProductionMap, LexicalGrammar, PrecedenceEntry, SyntaxGrammar, VariableType,
|
||||
},
|
||||
grammars::{LexicalGrammar, PrecedenceEntry, ReservedWordSetId, SyntaxGrammar, VariableType},
|
||||
node_types::VariableInfo,
|
||||
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
|
||||
tables::{
|
||||
|
|
@ -67,6 +65,33 @@ struct ParseTableBuilder<'a> {
|
|||
}
|
||||
|
||||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn new(
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
item_set_builder: ParseItemSetBuilder<'a>,
|
||||
variable_info: &'a [VariableInfo],
|
||||
) -> Self {
|
||||
Self {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
non_terminal_extra_states: Vec::new(),
|
||||
state_ids_by_item_set: IndexMap::default(),
|
||||
core_ids_by_core: HashMap::new(),
|
||||
parse_state_info_by_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
actual_conflicts: syntax_grammar.expected_conflicts.iter().cloned().collect(),
|
||||
parse_table: ParseTable {
|
||||
states: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
external_lex_states: Vec::new(),
|
||||
production_infos: Vec::new(),
|
||||
max_aliased_production_length: 1,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn build(mut self) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table
|
||||
|
|
@ -80,10 +105,13 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
self.add_parse_state(
|
||||
&Vec::new(),
|
||||
&Vec::new(),
|
||||
ParseItemSet::with(std::iter::once((
|
||||
ParseItem::start(),
|
||||
std::iter::once(Symbol::end()).collect(),
|
||||
))),
|
||||
ParseItemSet {
|
||||
entries: vec![ParseItemSetEntry {
|
||||
item: ParseItem::start(),
|
||||
lookaheads: std::iter::once(Symbol::end()).collect(),
|
||||
following_reserved_word_set: ReservedWordSetId::default(),
|
||||
}],
|
||||
},
|
||||
);
|
||||
|
||||
// Compute the possible item sets for non-terminal extras.
|
||||
|
|
@ -99,15 +127,14 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
non_terminal_extra_item_sets_by_first_terminal
|
||||
.entry(production.first_symbol().unwrap())
|
||||
.or_insert_with(ParseItemSet::default)
|
||||
.insert(
|
||||
ParseItem {
|
||||
variable_index: extra_non_terminal.index as u32,
|
||||
production,
|
||||
step_index: 1,
|
||||
has_preceding_inherited_fields: false,
|
||||
},
|
||||
&std::iter::once(Symbol::end_of_nonterminal_extra()).collect(),
|
||||
);
|
||||
.insert(ParseItem {
|
||||
variable_index: extra_non_terminal.index as u32,
|
||||
production,
|
||||
step_index: 1,
|
||||
has_preceding_inherited_fields: false,
|
||||
})
|
||||
.lookaheads
|
||||
.insert(Symbol::end_of_nonterminal_extra());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -176,6 +203,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
external_lex_state_id: 0,
|
||||
terminal_entries: IndexMap::default(),
|
||||
nonterminal_entries: IndexMap::default(),
|
||||
reserved_words: TokenSet::default(),
|
||||
core_id,
|
||||
});
|
||||
self.parse_state_queue.push_back(ParseStateQueueEntry {
|
||||
|
|
@ -202,13 +230,18 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
// Each item in the item set contributes to either or a Shift action or a Reduce
|
||||
// action in this state.
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
for ParseItemSetEntry {
|
||||
item,
|
||||
lookaheads,
|
||||
following_reserved_word_set: reserved_lookaheads,
|
||||
} in &item_set.entries
|
||||
{
|
||||
// If the item is unfinished, then this state has a transition for the item's
|
||||
// next symbol. Advance the item to its next step and insert the resulting
|
||||
// item into the successor item set.
|
||||
if let Some(next_symbol) = item.symbol() {
|
||||
let mut successor = item.successor();
|
||||
if next_symbol.is_non_terminal() {
|
||||
let successor_set = if next_symbol.is_non_terminal() {
|
||||
let variable = &self.syntax_grammar.variables[next_symbol.index];
|
||||
|
||||
// Keep track of where auxiliary non-terminals (repeat symbols) are
|
||||
|
|
@ -237,13 +270,16 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
non_terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(ParseItemSet::default)
|
||||
.insert(successor, lookaheads);
|
||||
} else {
|
||||
terminal_successors
|
||||
.entry(next_symbol)
|
||||
.or_insert_with(ParseItemSet::default)
|
||||
.insert(successor, lookaheads);
|
||||
}
|
||||
};
|
||||
let successor_entry = successor_set.insert(successor);
|
||||
successor_entry.lookaheads.insert_all(lookaheads);
|
||||
successor_entry.following_reserved_word_set = successor_entry
|
||||
.following_reserved_word_set
|
||||
.max(*reserved_lookaheads);
|
||||
}
|
||||
// If the item is finished, then add a Reduce action to this state based
|
||||
// on this item.
|
||||
|
|
@ -370,7 +406,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
)?;
|
||||
}
|
||||
|
||||
// Finally, add actions for the grammar's `extra` symbols.
|
||||
// Add actions for the grammar's `extra` symbols.
|
||||
let state = &mut self.parse_table.states[state_id];
|
||||
let is_end_of_non_terminal_extra = state.is_end_of_non_terminal_extra();
|
||||
|
||||
|
|
@ -382,7 +418,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let parent_symbols = item_set
|
||||
.entries
|
||||
.iter()
|
||||
.filter_map(|(item, _)| {
|
||||
.filter_map(|ParseItemSetEntry { item, .. }| {
|
||||
if !item.is_augmented() && item.step_index > 0 {
|
||||
Some(item.variable_index)
|
||||
} else {
|
||||
|
|
@ -436,6 +472,30 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
|
||||
let reserved_word_set_id = item_set
|
||||
.entries
|
||||
.iter()
|
||||
.filter_map(|entry| {
|
||||
if let Some(next_step) = entry.item.step() {
|
||||
if next_step.symbol == keyword_capture_token {
|
||||
Some(next_step.reserved_word_set_id)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else if entry.lookaheads.contains(&keyword_capture_token) {
|
||||
Some(entry.following_reserved_word_set)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.max();
|
||||
if let Some(reserved_word_set_id) = reserved_word_set_id {
|
||||
state.reserved_words =
|
||||
self.syntax_grammar.reserved_word_sets[reserved_word_set_id.0].clone();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -462,7 +522,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let mut considered_associativity = false;
|
||||
let mut shift_precedence = Vec::<(&Precedence, Symbol)>::new();
|
||||
let mut conflicting_items = HashSet::new();
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
for ParseItemSetEntry {
|
||||
item, lookaheads, ..
|
||||
} in &item_set.entries
|
||||
{
|
||||
if let Some(step) = item.step() {
|
||||
if item.step_index > 0
|
||||
&& self
|
||||
|
|
@ -836,7 +899,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
let parent_symbols = item_set
|
||||
.entries
|
||||
.iter()
|
||||
.filter_map(|(item, _)| {
|
||||
.filter_map(|ParseItemSetEntry { item, .. }| {
|
||||
let variable_index = item.variable_index as usize;
|
||||
if item.symbol() == Some(symbol)
|
||||
&& !self.syntax_grammar.variables[variable_index].is_auxiliary()
|
||||
|
|
@ -931,77 +994,17 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn populate_following_tokens(
|
||||
result: &mut [TokenSet],
|
||||
grammar: &SyntaxGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
builder: &ParseItemSetBuilder,
|
||||
) {
|
||||
let productions = grammar
|
||||
.variables
|
||||
.iter()
|
||||
.flat_map(|v| &v.productions)
|
||||
.chain(&inlines.productions);
|
||||
let all_tokens = (0..result.len())
|
||||
.map(Symbol::terminal)
|
||||
.collect::<TokenSet>();
|
||||
for production in productions {
|
||||
for i in 1..production.steps.len() {
|
||||
let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
|
||||
let right_tokens = builder.first_set(&production.steps[i].symbol);
|
||||
for left_token in left_tokens.iter() {
|
||||
if left_token.is_terminal() {
|
||||
result[left_token.index].insert_all_terminals(right_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for extra in &grammar.extra_symbols {
|
||||
if extra.is_terminal() {
|
||||
for entry in result.iter_mut() {
|
||||
entry.insert(*extra);
|
||||
}
|
||||
result[extra.index].clone_from(&all_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_parse_table<'a>(
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
item_set_builder: ParseItemSetBuilder<'a>,
|
||||
variable_info: &'a [VariableInfo],
|
||||
) -> Result<(ParseTable, Vec<TokenSet>, Vec<ParseStateInfo<'a>>)> {
|
||||
let actual_conflicts = syntax_grammar.expected_conflicts.iter().cloned().collect();
|
||||
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
||||
let mut following_tokens = vec![TokenSet::new(); lexical_grammar.variables.len()];
|
||||
populate_following_tokens(
|
||||
&mut following_tokens,
|
||||
syntax_grammar,
|
||||
inlines,
|
||||
&item_set_builder,
|
||||
);
|
||||
|
||||
let (table, item_sets) = ParseTableBuilder {
|
||||
) -> Result<(ParseTable, Vec<ParseStateInfo<'a>>)> {
|
||||
ParseTableBuilder::new(
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
non_terminal_extra_states: Vec::new(),
|
||||
actual_conflicts,
|
||||
state_ids_by_item_set: IndexMap::default(),
|
||||
core_ids_by_core: HashMap::new(),
|
||||
parse_state_info_by_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
parse_table: ParseTable {
|
||||
states: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
external_lex_states: Vec::new(),
|
||||
production_infos: Vec::new(),
|
||||
max_aliased_production_length: 1,
|
||||
},
|
||||
}
|
||||
.build()?;
|
||||
|
||||
Ok((table, following_tokens, item_sets))
|
||||
)
|
||||
.build()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,10 @@ use std::{
|
|||
use lazy_static::lazy_static;
|
||||
|
||||
use crate::{
|
||||
grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
|
||||
grammars::{
|
||||
LexicalGrammar, Production, ProductionStep, ReservedWordSetId, SyntaxGrammar,
|
||||
NO_RESERVED_WORDS,
|
||||
},
|
||||
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
|
||||
};
|
||||
|
||||
|
|
@ -23,6 +26,7 @@ lazy_static! {
|
|||
associativity: None,
|
||||
alias: None,
|
||||
field_name: None,
|
||||
reserved_word_set_id: NO_RESERVED_WORDS,
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
|
@ -58,7 +62,14 @@ pub struct ParseItem<'a> {
|
|||
/// to a state in the final parse table.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Default)]
|
||||
pub struct ParseItemSet<'a> {
|
||||
pub entries: Vec<(ParseItem<'a>, TokenSet)>,
|
||||
pub entries: Vec<ParseItemSetEntry<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct ParseItemSetEntry<'a> {
|
||||
pub item: ParseItem<'a>,
|
||||
pub lookaheads: TokenSet,
|
||||
pub following_reserved_word_set: ReservedWordSetId,
|
||||
}
|
||||
|
||||
/// A [`ParseItemSetCore`] is like a [`ParseItemSet`], but without the lookahead
|
||||
|
|
@ -152,30 +163,26 @@ impl<'a> ParseItem<'a> {
|
|||
}
|
||||
|
||||
impl<'a> ParseItemSet<'a> {
|
||||
pub fn with(elements: impl IntoIterator<Item = (ParseItem<'a>, TokenSet)>) -> Self {
|
||||
let mut result = Self::default();
|
||||
for (item, lookaheads) in elements {
|
||||
result.insert(item, &lookaheads);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, item: ParseItem<'a>, lookaheads: &TokenSet) -> &mut TokenSet {
|
||||
match self.entries.binary_search_by(|(i, _)| i.cmp(&item)) {
|
||||
pub fn insert(&mut self, item: ParseItem<'a>) -> &mut ParseItemSetEntry<'a> {
|
||||
match self.entries.binary_search_by(|e| e.item.cmp(&item)) {
|
||||
Err(i) => {
|
||||
self.entries.insert(i, (item, lookaheads.clone()));
|
||||
&mut self.entries[i].1
|
||||
}
|
||||
Ok(i) => {
|
||||
self.entries[i].1.insert_all(lookaheads);
|
||||
&mut self.entries[i].1
|
||||
self.entries.insert(
|
||||
i,
|
||||
ParseItemSetEntry {
|
||||
item,
|
||||
lookaheads: TokenSet::new(),
|
||||
following_reserved_word_set: ReservedWordSetId::default(),
|
||||
},
|
||||
);
|
||||
&mut self.entries[i]
|
||||
}
|
||||
Ok(i) => &mut self.entries[i],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn core(&self) -> ParseItemSetCore<'a> {
|
||||
ParseItemSetCore {
|
||||
entries: self.entries.iter().map(|e| e.0).collect(),
|
||||
entries: self.entries.iter().map(|e| e.item).collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -195,14 +202,21 @@ impl fmt::Display for ParseItemDisplay<'_> {
|
|||
for (i, step) in self.0.production.steps.iter().enumerate() {
|
||||
if i == self.0.step_index as usize {
|
||||
write!(f, " •")?;
|
||||
if let Some(associativity) = step.associativity {
|
||||
if !step.precedence.is_none()
|
||||
|| step.associativity.is_some()
|
||||
|| step.reserved_word_set_id != ReservedWordSetId::default()
|
||||
{
|
||||
write!(f, " (")?;
|
||||
if step.precedence.is_none() {
|
||||
write!(f, " ({associativity:?})")?;
|
||||
} else {
|
||||
write!(f, " ({} {associativity:?})", step.precedence)?;
|
||||
write!(f, " {}", step.precedence)?;
|
||||
}
|
||||
} else if !step.precedence.is_none() {
|
||||
write!(f, " ({})", step.precedence)?;
|
||||
if let Some(associativity) = step.associativity {
|
||||
write!(f, " {associativity:?}")?;
|
||||
}
|
||||
if step.reserved_word_set_id != ReservedWordSetId::default() {
|
||||
write!(f, "reserved: {}", step.reserved_word_set_id)?;
|
||||
}
|
||||
write!(f, " )")?;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -270,13 +284,21 @@ impl fmt::Display for TokenSetDisplay<'_> {
|
|||
|
||||
impl fmt::Display for ParseItemSetDisplay<'_> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
|
||||
for (item, lookaheads) in &self.0.entries {
|
||||
writeln!(
|
||||
for entry in &self.0.entries {
|
||||
write!(
|
||||
f,
|
||||
"{}\t{}",
|
||||
ParseItemDisplay(item, self.1, self.2),
|
||||
TokenSetDisplay(lookaheads, self.1, self.2)
|
||||
ParseItemDisplay(&entry.item, self.1, self.2),
|
||||
TokenSetDisplay(&entry.lookaheads, self.1, self.2),
|
||||
)?;
|
||||
if entry.following_reserved_word_set != ReservedWordSetId::default() {
|
||||
write!(
|
||||
f,
|
||||
"\treserved word set: {}",
|
||||
entry.following_reserved_word_set
|
||||
)?;
|
||||
}
|
||||
writeln!(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -296,7 +318,7 @@ impl Hash for ParseItem<'_> {
|
|||
// this item, unless any of the following are true:
|
||||
// * the children have fields
|
||||
// * the children have aliases
|
||||
// * the children are hidden and
|
||||
// * the children are hidden and represent rules that have fields.
|
||||
// See the docs for `has_preceding_inherited_fields`.
|
||||
for step in &self.production.steps[0..self.step_index as usize] {
|
||||
step.alias.hash(hasher);
|
||||
|
|
@ -399,9 +421,10 @@ impl Eq for ParseItem<'_> {}
|
|||
impl Hash for ParseItemSet<'_> {
|
||||
fn hash<H: Hasher>(&self, hasher: &mut H) {
|
||||
hasher.write_usize(self.entries.len());
|
||||
for (item, lookaheads) in &self.entries {
|
||||
item.hash(hasher);
|
||||
lookaheads.hash(hasher);
|
||||
for entry in &self.entries {
|
||||
entry.item.hash(hasher);
|
||||
entry.lookaheads.hash(hasher);
|
||||
entry.following_reserved_word_set.hash(hasher);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,9 +3,9 @@ use std::{
|
|||
fmt,
|
||||
};
|
||||
|
||||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
|
||||
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, ParseItemSetEntry, TokenSetDisplay};
|
||||
use crate::{
|
||||
grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
|
||||
grammars::{InlinedProductionMap, LexicalGrammar, ReservedWordSetId, SyntaxGrammar},
|
||||
rules::{Symbol, SymbolType, TokenSet},
|
||||
};
|
||||
|
||||
|
|
@ -15,9 +15,10 @@ struct TransitiveClosureAddition<'a> {
|
|||
info: FollowSetInfo,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
struct FollowSetInfo {
|
||||
lookaheads: TokenSet,
|
||||
reserved_lookaheads: ReservedWordSetId,
|
||||
propagates_lookaheads: bool,
|
||||
}
|
||||
|
||||
|
|
@ -25,6 +26,7 @@ pub struct ParseItemSetBuilder<'a> {
|
|||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
first_sets: HashMap<Symbol, TokenSet>,
|
||||
reserved_first_sets: HashMap<Symbol, ReservedWordSetId>,
|
||||
last_sets: HashMap<Symbol, TokenSet>,
|
||||
inlines: &'a InlinedProductionMap,
|
||||
transitive_closure_additions: Vec<Vec<TransitiveClosureAddition<'a>>>,
|
||||
|
|
@ -46,6 +48,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
first_sets: HashMap::new(),
|
||||
reserved_first_sets: HashMap::new(),
|
||||
last_sets: HashMap::new(),
|
||||
inlines,
|
||||
transitive_closure_additions: vec![Vec::new(); syntax_grammar.variables.len()],
|
||||
|
|
@ -54,8 +57,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
// For each grammar symbol, populate the FIRST and LAST sets: the set of
|
||||
// terminals that appear at the beginning and end that symbol's productions,
|
||||
// respectively.
|
||||
//
|
||||
// For a terminal symbol, the FIRST and LAST set just consists of the
|
||||
// For a terminal symbol, the FIRST and LAST sets just consist of the
|
||||
// terminal itself.
|
||||
for i in 0..lexical_grammar.variables.len() {
|
||||
let symbol = Symbol::terminal(i);
|
||||
|
|
@ -63,6 +65,9 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
set.insert(symbol);
|
||||
result.first_sets.insert(symbol, set.clone());
|
||||
result.last_sets.insert(symbol, set);
|
||||
result
|
||||
.reserved_first_sets
|
||||
.insert(symbol, ReservedWordSetId::default());
|
||||
}
|
||||
|
||||
for i in 0..syntax_grammar.external_tokens.len() {
|
||||
|
|
@ -71,12 +76,15 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
set.insert(symbol);
|
||||
result.first_sets.insert(symbol, set.clone());
|
||||
result.last_sets.insert(symbol, set);
|
||||
result
|
||||
.reserved_first_sets
|
||||
.insert(symbol, ReservedWordSetId::default());
|
||||
}
|
||||
|
||||
// The FIRST set of a non-terminal `i` is the union of the following sets:
|
||||
// * the set of all terminals that appear at the beginnings of i's productions
|
||||
// * the FIRST sets of all the non-terminals that appear at the beginnings of i's
|
||||
// productions
|
||||
// The FIRST set of a non-terminal `i` is the union of the the FIRST sets
|
||||
// of all the symbols that appear at the beginnings of i's productions. Some
|
||||
// of these symbols may themselves be non-terminals, so this is a recursive
|
||||
// definition.
|
||||
//
|
||||
// Rather than computing these sets using recursion, we use an explicit stack
|
||||
// called `symbols_to_process`.
|
||||
|
|
@ -84,37 +92,36 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
let mut processed_non_terminals = HashSet::new();
|
||||
for i in 0..syntax_grammar.variables.len() {
|
||||
let symbol = Symbol::non_terminal(i);
|
||||
let first_set = result.first_sets.entry(symbol).or_default();
|
||||
let reserved_first_set = result.reserved_first_sets.entry(symbol).or_default();
|
||||
|
||||
let first_set = result
|
||||
.first_sets
|
||||
.entry(symbol)
|
||||
.or_insert_with(TokenSet::new);
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push(symbol);
|
||||
while let Some(current_symbol) = symbols_to_process.pop() {
|
||||
if current_symbol.is_terminal() || current_symbol.is_external() {
|
||||
first_set.insert(current_symbol);
|
||||
} else if processed_non_terminals.insert(current_symbol) {
|
||||
for production in &syntax_grammar.variables[current_symbol.index].productions {
|
||||
if let Some(step) = production.steps.first() {
|
||||
while let Some(sym) = symbols_to_process.pop() {
|
||||
for production in &syntax_grammar.variables[sym.index].productions {
|
||||
if let Some(step) = production.steps.first() {
|
||||
if step.symbol.is_terminal() || step.symbol.is_external() {
|
||||
first_set.insert(step.symbol);
|
||||
} else if processed_non_terminals.insert(step.symbol) {
|
||||
symbols_to_process.push(step.symbol);
|
||||
}
|
||||
*reserved_first_set = (*reserved_first_set).max(step.reserved_word_set_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The LAST set is defined in a similar way to the FIRST set.
|
||||
let last_set = result.last_sets.entry(symbol).or_insert_with(TokenSet::new);
|
||||
let last_set = result.last_sets.entry(symbol).or_default();
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push(symbol);
|
||||
while let Some(current_symbol) = symbols_to_process.pop() {
|
||||
if current_symbol.is_terminal() || current_symbol.is_external() {
|
||||
last_set.insert(current_symbol);
|
||||
} else if processed_non_terminals.insert(current_symbol) {
|
||||
for production in &syntax_grammar.variables[current_symbol.index].productions {
|
||||
if let Some(step) = production.steps.last() {
|
||||
while let Some(sym) = symbols_to_process.pop() {
|
||||
for production in &syntax_grammar.variables[sym.index].productions {
|
||||
if let Some(step) = production.steps.last() {
|
||||
if step.symbol.is_terminal() || step.symbol.is_external() {
|
||||
last_set.insert(step.symbol);
|
||||
} else if processed_non_terminals.insert(step.symbol) {
|
||||
symbols_to_process.push(step.symbol);
|
||||
}
|
||||
}
|
||||
|
|
@ -124,67 +131,75 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
|
||||
// To compute an item set's transitive closure, we find each item in the set
|
||||
// whose next symbol is a non-terminal, and we add new items to the set for
|
||||
// each of that symbols' productions. These productions might themselves begin
|
||||
// each of that symbol's productions. These productions might themselves begin
|
||||
// with non-terminals, so the process continues recursively. In this process,
|
||||
// the total set of entries that get added depends only on two things:
|
||||
// * the set of non-terminal symbols that occur at each item's current position
|
||||
// * the set of terminals that occurs after each of these non-terminal symbols
|
||||
//
|
||||
// * the non-terminal symbol that occurs next in each item
|
||||
//
|
||||
// * the set of terminals that can follow that non-terminal symbol in the item
|
||||
//
|
||||
// So we can avoid a lot of duplicated recursive work by precomputing, for each
|
||||
// non-terminal symbol `i`, a final list of *additions* that must be made to an
|
||||
// item set when `i` occurs as the next symbol in one if its core items. The
|
||||
// structure of an *addition* is as follows:
|
||||
// * `item` - the new item that must be added as part of the expansion of `i`
|
||||
// * `lookaheads` - lookahead tokens that can always come after that item in the expansion
|
||||
// of `i`
|
||||
// * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the
|
||||
// *end* of the expansion of `i`, so that i's own current lookahead tokens can occur
|
||||
// after `item`.
|
||||
// item set when symbol `i` occurs as the next symbol in one if its core items.
|
||||
// The structure of a precomputed *addition* is as follows:
|
||||
//
|
||||
// Again, rather than computing these additions recursively, we use an explicit
|
||||
// stack called `entries_to_process`.
|
||||
// * `item` - the new item that must be added as part of the expansion of the symbol `i`.
|
||||
//
|
||||
// * `lookaheads` - the set of possible lookahead tokens that can always come after `item`
|
||||
// in an expansion of symbol `i`.
|
||||
//
|
||||
// * `reserved_lookaheads` - the set of reserved lookahead lookahead tokens that can
|
||||
// always come after `item` in the expansion of symbol `i`.
|
||||
//
|
||||
// * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the
|
||||
// *end* of the expansion of symbol `i`, so that i's own current lookahead tokens can
|
||||
// occur after `item`.
|
||||
//
|
||||
// Rather than computing these additions recursively, we use an explicit stack.
|
||||
let empty_lookaheads = TokenSet::new();
|
||||
let mut stack = Vec::new();
|
||||
let mut follow_set_info_by_non_terminal = HashMap::<usize, FollowSetInfo>::new();
|
||||
for i in 0..syntax_grammar.variables.len() {
|
||||
let empty_lookaheads = TokenSet::new();
|
||||
let mut entries_to_process = vec![(i, &empty_lookaheads, true)];
|
||||
|
||||
// First, build up a map whose keys are all of the non-terminals that can
|
||||
// appear at the beginning of non-terminal `i`, and whose values store
|
||||
// information about the tokens that can follow each non-terminal.
|
||||
let mut follow_set_info_by_non_terminal = HashMap::new();
|
||||
while let Some(entry) = entries_to_process.pop() {
|
||||
let (variable_index, lookaheads, propagates_lookaheads) = entry;
|
||||
let existing_info = follow_set_info_by_non_terminal
|
||||
.entry(variable_index)
|
||||
.or_insert_with(|| FollowSetInfo {
|
||||
lookaheads: TokenSet::new(),
|
||||
propagates_lookaheads: false,
|
||||
});
|
||||
|
||||
let did_add_follow_set_info;
|
||||
if propagates_lookaheads {
|
||||
did_add_follow_set_info = !existing_info.propagates_lookaheads;
|
||||
existing_info.propagates_lookaheads = true;
|
||||
} else {
|
||||
did_add_follow_set_info = existing_info.lookaheads.insert_all(lookaheads);
|
||||
// information about the tokens that can follow those non-terminals.
|
||||
stack.clear();
|
||||
stack.push((i, &empty_lookaheads, ReservedWordSetId::default(), true));
|
||||
follow_set_info_by_non_terminal.clear();
|
||||
while let Some((sym_ix, lookaheads, reserved_word_set_id, propagates_lookaheads)) =
|
||||
stack.pop()
|
||||
{
|
||||
let mut did_add = false;
|
||||
let info = follow_set_info_by_non_terminal.entry(sym_ix).or_default();
|
||||
did_add |= info.lookaheads.insert_all(lookaheads);
|
||||
if reserved_word_set_id > info.reserved_lookaheads {
|
||||
info.reserved_lookaheads = reserved_word_set_id;
|
||||
did_add = true;
|
||||
}
|
||||
did_add |= propagates_lookaheads && !info.propagates_lookaheads;
|
||||
info.propagates_lookaheads |= propagates_lookaheads;
|
||||
if !did_add {
|
||||
continue;
|
||||
}
|
||||
|
||||
if did_add_follow_set_info {
|
||||
for production in &syntax_grammar.variables[variable_index].productions {
|
||||
if let Some(symbol) = production.first_symbol() {
|
||||
if symbol.is_non_terminal() {
|
||||
if production.steps.len() == 1 {
|
||||
entries_to_process.push((
|
||||
symbol.index,
|
||||
lookaheads,
|
||||
propagates_lookaheads,
|
||||
));
|
||||
} else {
|
||||
entries_to_process.push((
|
||||
symbol.index,
|
||||
&result.first_sets[&production.steps[1].symbol],
|
||||
false,
|
||||
));
|
||||
}
|
||||
for production in &syntax_grammar.variables[sym_ix].productions {
|
||||
if let Some(symbol) = production.first_symbol() {
|
||||
if symbol.is_non_terminal() {
|
||||
if let Some(next_step) = production.steps.get(1) {
|
||||
stack.push((
|
||||
symbol.index,
|
||||
&result.first_sets[&next_step.symbol],
|
||||
result.reserved_first_sets[&next_step.symbol],
|
||||
false,
|
||||
));
|
||||
} else {
|
||||
stack.push((
|
||||
symbol.index,
|
||||
lookaheads,
|
||||
reserved_word_set_id,
|
||||
propagates_lookaheads,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -194,7 +209,7 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
// Store all of those non-terminals' productions, along with their associated
|
||||
// lookahead info, as *additions* associated with non-terminal `i`.
|
||||
let additions_for_non_terminal = &mut result.transitive_closure_additions[i];
|
||||
for (variable_index, follow_set_info) in follow_set_info_by_non_terminal {
|
||||
for (&variable_index, follow_set_info) in &follow_set_info_by_non_terminal {
|
||||
let variable = &syntax_grammar.variables[variable_index];
|
||||
let non_terminal = Symbol::non_terminal(variable_index);
|
||||
let variable_index = variable_index as u32;
|
||||
|
|
@ -239,20 +254,23 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
|
||||
pub fn transitive_closure(&self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> {
|
||||
let mut result = ParseItemSet::default();
|
||||
for (item, lookaheads) in &item_set.entries {
|
||||
for entry in &item_set.entries {
|
||||
if let Some(productions) = self
|
||||
.inlines
|
||||
.inlined_productions(item.production, item.step_index)
|
||||
.inlined_productions(entry.item.production, entry.item.step_index)
|
||||
{
|
||||
for production in productions {
|
||||
self.add_item(
|
||||
&mut result,
|
||||
item.substitute_production(production),
|
||||
lookaheads,
|
||||
&ParseItemSetEntry {
|
||||
item: entry.item.substitute_production(production),
|
||||
lookaheads: entry.lookaheads.clone(),
|
||||
following_reserved_word_set: entry.following_reserved_word_set,
|
||||
},
|
||||
);
|
||||
}
|
||||
} else {
|
||||
self.add_item(&mut result, *item, lookaheads);
|
||||
self.add_item(&mut result, entry);
|
||||
}
|
||||
}
|
||||
result
|
||||
|
|
@ -262,30 +280,64 @@ impl<'a> ParseItemSetBuilder<'a> {
|
|||
&self.first_sets[symbol]
|
||||
}
|
||||
|
||||
pub fn reserved_first_set(&self, symbol: &Symbol) -> Option<&TokenSet> {
|
||||
let id = *self.reserved_first_sets.get(symbol)?;
|
||||
Some(&self.syntax_grammar.reserved_word_sets[id.0])
|
||||
}
|
||||
|
||||
pub fn last_set(&self, symbol: &Symbol) -> &TokenSet {
|
||||
&self.last_sets[symbol]
|
||||
}
|
||||
|
||||
fn add_item(&self, set: &mut ParseItemSet<'a>, item: ParseItem<'a>, lookaheads: &TokenSet) {
|
||||
if let Some(step) = item.step() {
|
||||
fn add_item(&self, set: &mut ParseItemSet<'a>, entry: &ParseItemSetEntry<'a>) {
|
||||
if let Some(step) = entry.item.step() {
|
||||
if step.symbol.is_non_terminal() {
|
||||
let next_step = item.successor().step();
|
||||
let next_step = entry.item.successor().step();
|
||||
|
||||
// Determine which tokens can follow this non-terminal.
|
||||
let following_tokens = next_step.map_or(lookaheads, |next_step| {
|
||||
self.first_sets.get(&next_step.symbol).unwrap()
|
||||
});
|
||||
let (following_tokens, following_reserved_tokens) =
|
||||
if let Some(next_step) = next_step {
|
||||
(
|
||||
self.first_sets.get(&next_step.symbol).unwrap(),
|
||||
*self.reserved_first_sets.get(&next_step.symbol).unwrap(),
|
||||
)
|
||||
} else {
|
||||
(&entry.lookaheads, entry.following_reserved_word_set)
|
||||
};
|
||||
|
||||
// Use the pre-computed *additions* to expand the non-terminal.
|
||||
for addition in &self.transitive_closure_additions[step.symbol.index] {
|
||||
let lookaheads = set.insert(addition.item, &addition.info.lookaheads);
|
||||
let entry = set.insert(addition.item);
|
||||
entry.lookaheads.insert_all(&addition.info.lookaheads);
|
||||
|
||||
if let Some(word_token) = self.syntax_grammar.word_token {
|
||||
if addition.info.lookaheads.contains(&word_token) {
|
||||
entry.following_reserved_word_set = entry
|
||||
.following_reserved_word_set
|
||||
.max(addition.info.reserved_lookaheads);
|
||||
}
|
||||
}
|
||||
|
||||
if addition.info.propagates_lookaheads {
|
||||
lookaheads.insert_all(following_tokens);
|
||||
entry.lookaheads.insert_all(following_tokens);
|
||||
|
||||
if let Some(word_token) = self.syntax_grammar.word_token {
|
||||
if following_tokens.contains(&word_token) {
|
||||
entry.following_reserved_word_set = entry
|
||||
.following_reserved_word_set
|
||||
.max(following_reserved_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
set.insert(item, lookaheads);
|
||||
|
||||
let e = set.insert(entry.item);
|
||||
e.lookaheads.insert_all(&entry.lookaheads);
|
||||
e.following_reserved_word_set = e
|
||||
.following_reserved_word_set
|
||||
.max(entry.following_reserved_word_set);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -170,17 +170,12 @@ impl Minimizer<'_> {
|
|||
let mut new_states = Vec::with_capacity(state_ids_by_group_id.len());
|
||||
for state_ids in &state_ids_by_group_id {
|
||||
// Initialize the new state based on the first old state in the group.
|
||||
let mut parse_state = ParseState::default();
|
||||
mem::swap(&mut parse_state, &mut self.parse_table.states[state_ids[0]]);
|
||||
let mut parse_state = mem::take(&mut self.parse_table.states[state_ids[0]]);
|
||||
|
||||
// Extend the new state with all of the actions from the other old states
|
||||
// in the group.
|
||||
for state_id in &state_ids[1..] {
|
||||
let mut other_parse_state = ParseState::default();
|
||||
mem::swap(
|
||||
&mut other_parse_state,
|
||||
&mut self.parse_table.states[*state_id],
|
||||
);
|
||||
let other_parse_state = mem::take(&mut self.parse_table.states[*state_id]);
|
||||
|
||||
parse_state
|
||||
.terminal_entries
|
||||
|
|
@ -188,6 +183,12 @@ impl Minimizer<'_> {
|
|||
parse_state
|
||||
.nonterminal_entries
|
||||
.extend(other_parse_state.nonterminal_entries);
|
||||
parse_state
|
||||
.reserved_words
|
||||
.insert_all(&other_parse_state.reserved_words);
|
||||
for symbol in parse_state.terminal_entries.keys() {
|
||||
parse_state.reserved_words.remove(symbol);
|
||||
}
|
||||
}
|
||||
|
||||
// Update the new state's outgoing references using the new grouping.
|
||||
|
|
@ -216,24 +217,14 @@ impl Minimizer<'_> {
|
|||
) {
|
||||
return true;
|
||||
}
|
||||
} else if self.token_conflicts(
|
||||
left_state.id,
|
||||
right_state.id,
|
||||
right_state.terminal_entries.keys(),
|
||||
*token,
|
||||
) {
|
||||
} else if self.token_conflicts(left_state.id, right_state.id, right_state, *token) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
for token in right_state.terminal_entries.keys() {
|
||||
if !left_state.terminal_entries.contains_key(token)
|
||||
&& self.token_conflicts(
|
||||
left_state.id,
|
||||
right_state.id,
|
||||
left_state.terminal_entries.keys(),
|
||||
*token,
|
||||
)
|
||||
&& self.token_conflicts(left_state.id, right_state.id, left_state, *token)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
|
@ -350,11 +341,11 @@ impl Minimizer<'_> {
|
|||
false
|
||||
}
|
||||
|
||||
fn token_conflicts<'b>(
|
||||
fn token_conflicts(
|
||||
&self,
|
||||
left_id: ParseStateId,
|
||||
right_id: ParseStateId,
|
||||
existing_tokens: impl Iterator<Item = &'b Symbol>,
|
||||
right_state: &ParseState,
|
||||
new_token: Symbol,
|
||||
) -> bool {
|
||||
if new_token == Symbol::end_of_nonterminal_extra() {
|
||||
|
|
@ -372,6 +363,10 @@ impl Minimizer<'_> {
|
|||
return true;
|
||||
}
|
||||
|
||||
if right_state.reserved_words.contains(&new_token) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Do not add tokens which are both internal and external. Their validity could
|
||||
// influence the behavior of the external scanner.
|
||||
if self
|
||||
|
|
@ -388,23 +383,30 @@ impl Minimizer<'_> {
|
|||
}
|
||||
|
||||
// Do not add a token if it conflicts with an existing token.
|
||||
for token in existing_tokens {
|
||||
if token.is_terminal()
|
||||
&& !(self.syntax_grammar.word_token == Some(*token)
|
||||
&& self.keywords.contains(&new_token))
|
||||
&& !(self.syntax_grammar.word_token == Some(new_token)
|
||||
&& self.keywords.contains(token))
|
||||
&& (self
|
||||
for token in right_state.terminal_entries.keys().copied() {
|
||||
if !token.is_terminal() {
|
||||
continue;
|
||||
}
|
||||
if self.syntax_grammar.word_token == Some(token) && self.keywords.contains(&new_token) {
|
||||
continue;
|
||||
}
|
||||
if self.syntax_grammar.word_token == Some(new_token) && self.keywords.contains(&token) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if self
|
||||
.token_conflict_map
|
||||
.does_conflict(new_token.index, token.index)
|
||||
|| self
|
||||
.token_conflict_map
|
||||
.does_conflict(new_token.index, token.index)
|
||||
|| self
|
||||
.token_conflict_map
|
||||
.does_match_same_string(new_token.index, token.index))
|
||||
.does_match_same_string(new_token.index, token.index)
|
||||
{
|
||||
info!(
|
||||
"split states {left_id} {right_id} - token {} conflicts with {}",
|
||||
"split states {} {} - token {} conflicts with {}",
|
||||
left_id,
|
||||
right_id,
|
||||
self.symbol_name(&new_token),
|
||||
self.symbol_name(token),
|
||||
self.symbol_name(&token),
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ use self::{
|
|||
build_lex_table::build_lex_table,
|
||||
build_parse_table::{build_parse_table, ParseStateInfo},
|
||||
coincident_tokens::CoincidentTokenIndex,
|
||||
item_set_builder::ParseItemSetBuilder,
|
||||
minimize_parse_table::minimize_parse_table,
|
||||
token_conflicts::TokenConflictMap,
|
||||
};
|
||||
|
|
@ -31,7 +32,6 @@ pub struct Tables {
|
|||
pub parse_table: ParseTable,
|
||||
pub main_lex_table: LexTable,
|
||||
pub keyword_lex_table: LexTable,
|
||||
pub word_token: Option<Symbol>,
|
||||
pub large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
|
||||
}
|
||||
|
||||
|
|
@ -43,8 +43,15 @@ pub fn build_tables(
|
|||
inlines: &InlinedProductionMap,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<Tables> {
|
||||
let (mut parse_table, following_tokens, parse_state_info) =
|
||||
build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?;
|
||||
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
||||
let following_tokens =
|
||||
get_following_tokens(syntax_grammar, lexical_grammar, inlines, &item_set_builder);
|
||||
let (mut parse_table, parse_state_info) = build_parse_table(
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
)?;
|
||||
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
|
||||
let keywords = identify_keywords(
|
||||
|
|
@ -97,10 +104,50 @@ pub fn build_tables(
|
|||
main_lex_table: lex_tables.main_lex_table,
|
||||
keyword_lex_table: lex_tables.keyword_lex_table,
|
||||
large_character_sets: lex_tables.large_character_sets,
|
||||
word_token: syntax_grammar.word_token,
|
||||
})
|
||||
}
|
||||
|
||||
fn get_following_tokens(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
builder: &ParseItemSetBuilder,
|
||||
) -> Vec<TokenSet> {
|
||||
let mut result = vec![TokenSet::new(); lexical_grammar.variables.len()];
|
||||
let productions = syntax_grammar
|
||||
.variables
|
||||
.iter()
|
||||
.flat_map(|v| &v.productions)
|
||||
.chain(&inlines.productions);
|
||||
let all_tokens = (0..result.len())
|
||||
.map(Symbol::terminal)
|
||||
.collect::<TokenSet>();
|
||||
for production in productions {
|
||||
for i in 1..production.steps.len() {
|
||||
let left_tokens = builder.last_set(&production.steps[i - 1].symbol);
|
||||
let right_tokens = builder.first_set(&production.steps[i].symbol);
|
||||
let right_reserved_tokens = builder.reserved_first_set(&production.steps[i].symbol);
|
||||
for left_token in left_tokens.iter() {
|
||||
if left_token.is_terminal() {
|
||||
result[left_token.index].insert_all_terminals(right_tokens);
|
||||
if let Some(reserved_tokens) = right_reserved_tokens {
|
||||
result[left_token.index].insert_all_terminals(reserved_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for extra in &syntax_grammar.extra_symbols {
|
||||
if extra.is_terminal() {
|
||||
for entry in &mut result {
|
||||
entry.insert(*extra);
|
||||
}
|
||||
result[extra.index] = all_tokens.clone();
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
fn populate_error_state(
|
||||
parse_table: &mut ParseTable,
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
|
|
@ -414,9 +461,9 @@ fn report_state_info<'a>(
|
|||
for (i, state) in parse_table.states.iter().enumerate() {
|
||||
all_state_indices.insert(i);
|
||||
let item_set = &parse_state_info[state.id];
|
||||
for (item, _) in &item_set.1.entries {
|
||||
if !item.is_augmented() {
|
||||
symbols_with_state_indices[item.variable_index as usize]
|
||||
for entry in &item_set.1.entries {
|
||||
if !entry.item.is_augmented() {
|
||||
symbols_with_state_indices[entry.item.variable_index as usize]
|
||||
.1
|
||||
.insert(i);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ pub fn split_state_id_groups<S>(
|
|||
state_ids_by_group_id: &mut Vec<Vec<usize>>,
|
||||
group_ids_by_state_id: &mut [usize],
|
||||
start_group_id: usize,
|
||||
mut f: impl FnMut(&S, &S, &[usize]) -> bool,
|
||||
mut should_split: impl FnMut(&S, &S, &[usize]) -> bool,
|
||||
) -> bool {
|
||||
let mut result = false;
|
||||
|
||||
|
|
@ -33,7 +33,7 @@ pub fn split_state_id_groups<S>(
|
|||
}
|
||||
let right_state = &states[right_state_id];
|
||||
|
||||
if f(left_state, right_state, group_ids_by_state_id) {
|
||||
if should_split(left_state, right_state, group_ids_by_state_id) {
|
||||
split_state_ids.push(right_state_id);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ function alias(rule, value) {
|
|||
result.value = value.symbol.name;
|
||||
return result;
|
||||
case Object:
|
||||
case GrammarSymbol:
|
||||
if (typeof value.type === 'string' && value.type === 'SYMBOL') {
|
||||
result.named = true;
|
||||
result.value = value.name;
|
||||
|
|
@ -153,11 +154,26 @@ function seq(...elements) {
|
|||
};
|
||||
}
|
||||
|
||||
function sym(name) {
|
||||
class GrammarSymbol {
|
||||
constructor(name) {
|
||||
this.type = "SYMBOL";
|
||||
this.name = name;
|
||||
}
|
||||
}
|
||||
|
||||
function reserved(wordset, rule) {
|
||||
if (typeof wordset !== 'string') {
|
||||
throw new Error('Invalid reserved word set name: ' + wordset)
|
||||
}
|
||||
return {
|
||||
type: "SYMBOL",
|
||||
name
|
||||
};
|
||||
type: "RESERVED",
|
||||
content: normalize(rule),
|
||||
context_name: wordset,
|
||||
}
|
||||
}
|
||||
|
||||
function sym(name) {
|
||||
return new GrammarSymbol(name);
|
||||
}
|
||||
|
||||
function token(value) {
|
||||
|
|
@ -236,6 +252,7 @@ function grammar(baseGrammar, options) {
|
|||
inline: [],
|
||||
supertypes: [],
|
||||
precedences: [],
|
||||
reserved: {},
|
||||
};
|
||||
} else {
|
||||
baseGrammar = baseGrammar.grammar;
|
||||
|
|
@ -309,6 +326,28 @@ function grammar(baseGrammar, options) {
|
|||
}
|
||||
}
|
||||
|
||||
let reserved = baseGrammar.reserved;
|
||||
if (options.reserved) {
|
||||
if (typeof options.reserved !== "object") {
|
||||
throw new Error("Grammar's 'reserved' property must be an object.");
|
||||
}
|
||||
|
||||
for (const reservedWordSetName of Object.keys(options.reserved)) {
|
||||
const reservedWordSetFn = options.reserved[reservedWordSetName]
|
||||
if (typeof reservedWordSetFn !== "function") {
|
||||
throw new Error(`Grammar reserved word sets must all be functions. '${reservedWordSetName}' is not.`);
|
||||
}
|
||||
|
||||
const reservedTokens = reservedWordSetFn.call(ruleBuilder, ruleBuilder, baseGrammar.reserved[reservedWordSetName]);
|
||||
|
||||
if (!Array.isArray(reservedTokens)) {
|
||||
throw new Error(`Grammar's reserved word set functions must all return arrays of rules. '${reservedWordSetName}' does not.`);
|
||||
}
|
||||
|
||||
reserved[reservedWordSetName] = reservedTokens.map(normalize);
|
||||
}
|
||||
}
|
||||
|
||||
let extras = baseGrammar.extras.slice();
|
||||
if (options.extras) {
|
||||
if (typeof options.extras !== "function") {
|
||||
|
|
@ -439,6 +478,7 @@ function grammar(baseGrammar, options) {
|
|||
externals,
|
||||
inline,
|
||||
supertypes,
|
||||
reserved,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
|
@ -478,6 +518,7 @@ globalThis.optional = optional;
|
|||
globalThis.prec = prec;
|
||||
globalThis.repeat = repeat;
|
||||
globalThis.repeat1 = repeat1;
|
||||
global.reserved = reserved;
|
||||
globalThis.seq = seq;
|
||||
globalThis.sym = sym;
|
||||
globalThis.token = token;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use std::{collections::HashMap, fmt};
|
|||
|
||||
use super::{
|
||||
nfa::Nfa,
|
||||
rules::{Alias, Associativity, Precedence, Rule, Symbol},
|
||||
rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
|
||||
};
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
|
|
@ -39,6 +39,13 @@ pub struct InputGrammar {
|
|||
pub variables_to_inline: Vec<String>,
|
||||
pub supertype_symbols: Vec<String>,
|
||||
pub word_token: Option<String>,
|
||||
pub reserved_words: Vec<ReservedWordContext<Rule>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub struct ReservedWordContext<T> {
|
||||
pub name: String,
|
||||
pub reserved_words: Vec<T>,
|
||||
}
|
||||
|
||||
// Extracted lexical grammar
|
||||
|
|
@ -66,8 +73,20 @@ pub struct ProductionStep {
|
|||
pub associativity: Option<Associativity>,
|
||||
pub alias: Option<Alias>,
|
||||
pub field_name: Option<String>,
|
||||
pub reserved_word_set_id: ReservedWordSetId,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct ReservedWordSetId(pub usize);
|
||||
|
||||
impl fmt::Display for ReservedWordSetId {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
pub const NO_RESERVED_WORDS: ReservedWordSetId = ReservedWordSetId(usize::MAX);
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub struct Production {
|
||||
pub steps: Vec<ProductionStep>,
|
||||
|
|
@ -104,51 +123,44 @@ pub struct SyntaxGrammar {
|
|||
pub variables_to_inline: Vec<Symbol>,
|
||||
pub word_token: Option<Symbol>,
|
||||
pub precedence_orderings: Vec<Vec<PrecedenceEntry>>,
|
||||
pub reserved_word_sets: Vec<TokenSet>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl ProductionStep {
|
||||
#[must_use]
|
||||
pub const fn new(symbol: Symbol) -> Self {
|
||||
pub fn new(symbol: Symbol) -> Self {
|
||||
Self {
|
||||
symbol,
|
||||
precedence: Precedence::None,
|
||||
associativity: None,
|
||||
alias: None,
|
||||
field_name: None,
|
||||
reserved_word_set_id: ReservedWordSetId::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_prec(self, precedence: Precedence, associativity: Option<Associativity>) -> Self {
|
||||
Self {
|
||||
symbol: self.symbol,
|
||||
precedence,
|
||||
associativity,
|
||||
alias: self.alias,
|
||||
field_name: self.field_name,
|
||||
}
|
||||
pub fn with_prec(
|
||||
mut self,
|
||||
precedence: Precedence,
|
||||
associativity: Option<Associativity>,
|
||||
) -> Self {
|
||||
self.precedence = precedence;
|
||||
self.associativity = associativity;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_alias(self, value: &str, is_named: bool) -> Self {
|
||||
Self {
|
||||
symbol: self.symbol,
|
||||
precedence: self.precedence,
|
||||
associativity: self.associativity,
|
||||
alias: Some(Alias {
|
||||
value: value.to_string(),
|
||||
is_named,
|
||||
}),
|
||||
field_name: self.field_name,
|
||||
}
|
||||
pub fn with_alias(mut self, value: &str, is_named: bool) -> Self {
|
||||
self.alias = Some(Alias {
|
||||
value: value.to_string(),
|
||||
is_named,
|
||||
});
|
||||
self
|
||||
}
|
||||
pub fn with_field_name(self, name: &str) -> Self {
|
||||
Self {
|
||||
symbol: self.symbol,
|
||||
precedence: self.precedence,
|
||||
associativity: self.associativity,
|
||||
alias: self.alias,
|
||||
field_name: Some(name.to_string()),
|
||||
}
|
||||
|
||||
pub fn with_field_name(mut self, name: &str) -> Self {
|
||||
self.field_name = Some(name.to_string());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
use std::collections::HashSet;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use anyhow::{anyhow, bail, Result};
|
||||
use serde::Deserialize;
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
|
|
@ -8,6 +8,7 @@ use super::{
|
|||
grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType},
|
||||
rules::{Precedence, Rule},
|
||||
};
|
||||
use crate::grammars::ReservedWordContext;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
#[serde(tag = "type")]
|
||||
|
|
@ -68,6 +69,10 @@ enum RuleJSON {
|
|||
IMMEDIATE_TOKEN {
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
RESERVED {
|
||||
context_name: String,
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
|
|
@ -93,7 +98,10 @@ pub struct GrammarJSON {
|
|||
inline: Vec<String>,
|
||||
#[serde(default)]
|
||||
supertypes: Vec<String>,
|
||||
#[serde(default)]
|
||||
word: Option<String>,
|
||||
#[serde(default)]
|
||||
reserved: Map<String, Value>,
|
||||
}
|
||||
|
||||
fn rule_is_referenced(rule: &Rule, target: &str) -> bool {
|
||||
|
|
@ -102,7 +110,9 @@ fn rule_is_referenced(rule: &Rule, target: &str) -> bool {
|
|||
Rule::Choice(rules) | Rule::Seq(rules) => {
|
||||
rules.iter().any(|r| rule_is_referenced(r, target))
|
||||
}
|
||||
Rule::Metadata { rule, .. } => rule_is_referenced(rule, target),
|
||||
Rule::Metadata { rule, .. } | Rule::Reserved { rule, .. } => {
|
||||
rule_is_referenced(rule, target)
|
||||
}
|
||||
Rule::Repeat(inner) => rule_is_referenced(inner, target),
|
||||
Rule::Blank | Rule::String(_) | Rule::Pattern(_, _) | Rule::Symbol(_) => false,
|
||||
}
|
||||
|
|
@ -226,6 +236,27 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
|||
});
|
||||
}
|
||||
|
||||
let reserved_words = grammar_json
|
||||
.reserved
|
||||
.into_iter()
|
||||
.map(|(name, rule_values)| {
|
||||
let mut reserved_words = Vec::new();
|
||||
|
||||
let Value::Array(rule_values) = rule_values else {
|
||||
bail!("reserved word sets must be arrays");
|
||||
};
|
||||
|
||||
for value in rule_values {
|
||||
let rule_json: RuleJSON = serde_json::from_value(value)?;
|
||||
reserved_words.push(parse_rule(rule_json));
|
||||
}
|
||||
Ok(ReservedWordContext {
|
||||
name,
|
||||
reserved_words,
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
Ok(InputGrammar {
|
||||
name: grammar_json.name,
|
||||
word_token: grammar_json.word,
|
||||
|
|
@ -236,6 +267,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
|||
variables,
|
||||
extra_symbols,
|
||||
external_tokens,
|
||||
reserved_words,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -283,6 +315,13 @@ fn parse_rule(json: RuleJSON) -> Rule {
|
|||
RuleJSON::PREC_DYNAMIC { value, content } => {
|
||||
Rule::prec_dynamic(value, parse_rule(*content))
|
||||
}
|
||||
RuleJSON::RESERVED {
|
||||
content,
|
||||
context_name,
|
||||
} => Rule::Reserved {
|
||||
rule: Box::new(parse_rule(*content)),
|
||||
context_name,
|
||||
},
|
||||
RuleJSON::TOKEN { content } => Rule::token(parse_rule(*content)),
|
||||
RuleJSON::IMMEDIATE_TOKEN { content } => Rule::immediate_token(parse_rule(*content)),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
use std::{collections::HashMap, mem};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
|
||||
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
|
||||
use crate::{
|
||||
grammars::{ExternalToken, Variable, VariableType},
|
||||
grammars::{ExternalToken, ReservedWordContext, Variable, VariableType},
|
||||
rules::{MetadataParams, Rule, Symbol, SymbolType},
|
||||
};
|
||||
|
||||
|
|
@ -148,6 +148,27 @@ pub(super) fn extract_tokens(
|
|||
word_token = Some(token);
|
||||
}
|
||||
|
||||
let mut reserved_word_contexts = Vec::new();
|
||||
for reserved_word_context in grammar.reserved_word_sets {
|
||||
let mut reserved_words = Vec::new();
|
||||
for reserved_rule in reserved_word_context.reserved_words {
|
||||
if let Rule::Symbol(symbol) = reserved_rule {
|
||||
reserved_words.push(symbol_replacer.replace_symbol(symbol));
|
||||
} else if let Some(index) = lexical_variables
|
||||
.iter()
|
||||
.position(|v| v.rule == reserved_rule)
|
||||
{
|
||||
reserved_words.push(Symbol::terminal(index));
|
||||
} else {
|
||||
return Err(anyhow!("Reserved words must be tokens"));
|
||||
}
|
||||
}
|
||||
reserved_word_contexts.push(ReservedWordContext {
|
||||
name: reserved_word_context.name,
|
||||
reserved_words,
|
||||
});
|
||||
}
|
||||
|
||||
Ok((
|
||||
ExtractedSyntaxGrammar {
|
||||
variables,
|
||||
|
|
@ -158,6 +179,7 @@ pub(super) fn extract_tokens(
|
|||
external_tokens,
|
||||
word_token,
|
||||
precedence_orderings: grammar.precedence_orderings,
|
||||
reserved_word_sets: reserved_word_contexts,
|
||||
},
|
||||
ExtractedLexicalGrammar {
|
||||
variables: lexical_variables,
|
||||
|
|
@ -188,9 +210,7 @@ impl TokenExtractor {
|
|||
self.current_variable_name.push_str(&variable.name);
|
||||
self.current_variable_token_count = 0;
|
||||
self.is_first_rule = is_first;
|
||||
let mut rule = Rule::Blank;
|
||||
mem::swap(&mut rule, &mut variable.rule);
|
||||
variable.rule = self.extract_tokens_in_rule(&rule)?;
|
||||
variable.rule = self.extract_tokens_in_rule(&variable.rule)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -237,6 +257,10 @@ impl TokenExtractor {
|
|||
.map(|e| self.extract_tokens_in_rule(e))
|
||||
.collect::<Result<Vec<_>>>()?,
|
||||
)),
|
||||
Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
|
||||
rule: Box::new(self.extract_tokens_in_rule(rule)?),
|
||||
context_name: context_name.clone(),
|
||||
}),
|
||||
_ => Ok(input.clone()),
|
||||
}
|
||||
}
|
||||
|
|
@ -305,6 +329,10 @@ impl SymbolReplacer {
|
|||
params: params.clone(),
|
||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
||||
},
|
||||
Rule::Reserved { rule, context_name } => Rule::Reserved {
|
||||
rule: Box::new(self.replace_symbols_in_rule(rule)),
|
||||
context_name: context_name.clone(),
|
||||
},
|
||||
_ => rule.clone(),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,48 +1,77 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use indoc::indoc;
|
||||
|
||||
use super::ExtractedSyntaxGrammar;
|
||||
use crate::{
|
||||
grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable},
|
||||
rules::{Alias, Associativity, Precedence, Rule, Symbol},
|
||||
grammars::{
|
||||
Production, ProductionStep, ReservedWordSetId, SyntaxGrammar, SyntaxVariable, Variable,
|
||||
},
|
||||
rules::{Alias, Associativity, Precedence, Rule, Symbol, TokenSet},
|
||||
};
|
||||
|
||||
struct RuleFlattener {
|
||||
production: Production,
|
||||
reserved_word_set_ids: HashMap<String, ReservedWordSetId>,
|
||||
precedence_stack: Vec<Precedence>,
|
||||
associativity_stack: Vec<Associativity>,
|
||||
reserved_word_stack: Vec<ReservedWordSetId>,
|
||||
alias_stack: Vec<Alias>,
|
||||
field_name_stack: Vec<String>,
|
||||
}
|
||||
|
||||
impl RuleFlattener {
|
||||
const fn new() -> Self {
|
||||
const fn new(reserved_word_set_ids: HashMap<String, ReservedWordSetId>) -> Self {
|
||||
Self {
|
||||
production: Production {
|
||||
steps: Vec::new(),
|
||||
dynamic_precedence: 0,
|
||||
},
|
||||
reserved_word_set_ids,
|
||||
precedence_stack: Vec::new(),
|
||||
associativity_stack: Vec::new(),
|
||||
reserved_word_stack: Vec::new(),
|
||||
alias_stack: Vec::new(),
|
||||
field_name_stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten(mut self, rule: Rule) -> Production {
|
||||
self.apply(rule, true);
|
||||
self.production
|
||||
fn flatten_variable(&mut self, variable: Variable) -> Result<SyntaxVariable> {
|
||||
let mut productions = Vec::new();
|
||||
for rule in extract_choices(variable.rule) {
|
||||
let production = self.flatten_rule(rule)?;
|
||||
if !productions.contains(&production) {
|
||||
productions.push(production);
|
||||
}
|
||||
}
|
||||
Ok(SyntaxVariable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
productions,
|
||||
})
|
||||
}
|
||||
|
||||
fn apply(&mut self, rule: Rule, at_end: bool) -> bool {
|
||||
fn flatten_rule(&mut self, rule: Rule) -> Result<Production> {
|
||||
self.production = Production::default();
|
||||
self.alias_stack.clear();
|
||||
self.reserved_word_stack.clear();
|
||||
self.precedence_stack.clear();
|
||||
self.associativity_stack.clear();
|
||||
self.field_name_stack.clear();
|
||||
self.apply(rule, true)?;
|
||||
Ok(self.production.clone())
|
||||
}
|
||||
|
||||
fn apply(&mut self, rule: Rule, at_end: bool) -> Result<bool> {
|
||||
match rule {
|
||||
Rule::Seq(members) => {
|
||||
let mut result = false;
|
||||
let last_index = members.len() - 1;
|
||||
for (i, member) in members.into_iter().enumerate() {
|
||||
result |= self.apply(member, i == last_index && at_end);
|
||||
result |= self.apply(member, i == last_index && at_end)?;
|
||||
}
|
||||
result
|
||||
Ok(result)
|
||||
}
|
||||
Rule::Metadata { rule, params } => {
|
||||
let mut has_precedence = false;
|
||||
|
|
@ -73,7 +102,7 @@ impl RuleFlattener {
|
|||
self.production.dynamic_precedence = params.dynamic_precedence;
|
||||
}
|
||||
|
||||
let did_push = self.apply(*rule, at_end);
|
||||
let did_push = self.apply(*rule, at_end)?;
|
||||
|
||||
if has_precedence {
|
||||
self.precedence_stack.pop();
|
||||
|
|
@ -102,7 +131,18 @@ impl RuleFlattener {
|
|||
self.field_name_stack.pop();
|
||||
}
|
||||
|
||||
did_push
|
||||
Ok(did_push)
|
||||
}
|
||||
Rule::Reserved { rule, context_name } => {
|
||||
self.reserved_word_stack.push(
|
||||
self.reserved_word_set_ids
|
||||
.get(&context_name)
|
||||
.copied()
|
||||
.ok_or_else(|| anyhow!("no such reserved word set: {context_name}"))?,
|
||||
);
|
||||
let did_push = self.apply(*rule, at_end)?;
|
||||
self.reserved_word_stack.pop();
|
||||
Ok(did_push)
|
||||
}
|
||||
Rule::Symbol(symbol) => {
|
||||
self.production.steps.push(ProductionStep {
|
||||
|
|
@ -113,12 +153,17 @@ impl RuleFlattener {
|
|||
.cloned()
|
||||
.unwrap_or(Precedence::None),
|
||||
associativity: self.associativity_stack.last().copied(),
|
||||
reserved_word_set_id: self
|
||||
.reserved_word_stack
|
||||
.last()
|
||||
.copied()
|
||||
.unwrap_or(ReservedWordSetId::default()),
|
||||
alias: self.alias_stack.last().cloned(),
|
||||
field_name: self.field_name_stack.last().cloned(),
|
||||
});
|
||||
true
|
||||
Ok(true)
|
||||
}
|
||||
_ => false,
|
||||
_ => Ok(false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -155,25 +200,17 @@ fn extract_choices(rule: Rule) -> Vec<Rule> {
|
|||
params: params.clone(),
|
||||
})
|
||||
.collect(),
|
||||
Rule::Reserved { rule, context_name } => extract_choices(*rule)
|
||||
.into_iter()
|
||||
.map(|rule| Rule::Reserved {
|
||||
rule: Box::new(rule),
|
||||
context_name: context_name.clone(),
|
||||
})
|
||||
.collect(),
|
||||
_ => vec![rule],
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten_variable(variable: Variable) -> SyntaxVariable {
|
||||
let mut productions = Vec::new();
|
||||
for rule in extract_choices(variable.rule) {
|
||||
let production = RuleFlattener::new().flatten(rule);
|
||||
if !productions.contains(&production) {
|
||||
productions.push(production);
|
||||
}
|
||||
}
|
||||
SyntaxVariable {
|
||||
name: variable.name,
|
||||
kind: variable.kind,
|
||||
productions,
|
||||
}
|
||||
}
|
||||
|
||||
fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
|
||||
for variable in variables {
|
||||
for production in &variable.productions {
|
||||
|
|
@ -188,10 +225,18 @@ fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool {
|
|||
}
|
||||
|
||||
pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxGrammar> {
|
||||
let mut variables = Vec::new();
|
||||
for variable in grammar.variables {
|
||||
variables.push(flatten_variable(variable));
|
||||
let mut reserved_word_set_ids_by_name = HashMap::new();
|
||||
for (ix, set) in grammar.reserved_word_sets.iter().enumerate() {
|
||||
reserved_word_set_ids_by_name.insert(set.name.clone(), ReservedWordSetId(ix));
|
||||
}
|
||||
|
||||
let mut flattener = RuleFlattener::new(reserved_word_set_ids_by_name);
|
||||
let variables = grammar
|
||||
.variables
|
||||
.into_iter()
|
||||
.map(|variable| flattener.flatten_variable(variable))
|
||||
.collect::<Result<Vec<_>>>()?;
|
||||
|
||||
for (i, variable) in variables.iter().enumerate() {
|
||||
let symbol = Symbol::non_terminal(i);
|
||||
|
||||
|
|
@ -218,6 +263,17 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
|
|||
}
|
||||
}
|
||||
}
|
||||
let mut reserved_word_sets = grammar
|
||||
.reserved_word_sets
|
||||
.into_iter()
|
||||
.map(|set| set.reserved_words.into_iter().collect())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// If no default reserved word set is specified, there are no reserved words.
|
||||
if reserved_word_sets.is_empty() {
|
||||
reserved_word_sets.push(TokenSet::default());
|
||||
}
|
||||
|
||||
Ok(SyntaxGrammar {
|
||||
extra_symbols: grammar.extra_symbols,
|
||||
expected_conflicts: grammar.expected_conflicts,
|
||||
|
|
@ -226,6 +282,7 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result<SyntaxG
|
|||
external_tokens: grammar.external_tokens,
|
||||
supertype_symbols: grammar.supertype_symbols,
|
||||
word_token: grammar.word_token,
|
||||
reserved_word_sets,
|
||||
variables,
|
||||
})
|
||||
}
|
||||
|
|
@ -237,28 +294,31 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_flatten_grammar() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_right(
|
||||
Precedence::Integer(102),
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_right(
|
||||
Precedence::Integer(102),
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
});
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -295,28 +355,31 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_maximum_dynamic_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_dynamic(
|
||||
101,
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_dynamic(
|
||||
102,
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::non_terminal(1),
|
||||
Rule::prec_dynamic(
|
||||
101,
|
||||
Rule::seq(vec![
|
||||
Rule::non_terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::prec_dynamic(
|
||||
102,
|
||||
Rule::seq(vec![Rule::non_terminal(3), Rule::non_terminal(4)]),
|
||||
),
|
||||
Rule::non_terminal(5),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
Rule::non_terminal(6),
|
||||
]),
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
});
|
||||
),
|
||||
Rule::non_terminal(7),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -348,14 +411,17 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_final_precedence() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||
),
|
||||
});
|
||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![Rule::non_terminal(1), Rule::non_terminal(2)]),
|
||||
),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -370,14 +436,16 @@ mod tests {
|
|||
}]
|
||||
);
|
||||
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![Rule::non_terminal(1)]),
|
||||
),
|
||||
});
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::prec_left(
|
||||
Precedence::Integer(101),
|
||||
Rule::seq(vec![Rule::non_terminal(1)]),
|
||||
),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -391,18 +459,21 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_field_names() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("first-thing".to_string(), Rule::terminal(1)),
|
||||
Rule::terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::Blank,
|
||||
Rule::field("second-thing".to_string(), Rule::terminal(3)),
|
||||
let mut flattener = RuleFlattener::new(HashMap::default());
|
||||
let result = flattener
|
||||
.flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("first-thing".to_string(), Rule::terminal(1)),
|
||||
Rule::terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::Blank,
|
||||
Rule::field("second-thing".to_string(), Rule::terminal(3)),
|
||||
]),
|
||||
]),
|
||||
]),
|
||||
});
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
|
|
@ -436,6 +507,7 @@ mod tests {
|
|||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
reserved_word_sets: Vec::new(),
|
||||
variables: vec![Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use anyhow::{anyhow, Result};
|
|||
|
||||
use super::InternedGrammar;
|
||||
use crate::{
|
||||
grammars::{InputGrammar, Variable, VariableType},
|
||||
grammars::{InputGrammar, ReservedWordContext, Variable, VariableType},
|
||||
rules::{Rule, Symbol},
|
||||
};
|
||||
|
||||
|
|
@ -45,6 +45,18 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
})?);
|
||||
}
|
||||
|
||||
let mut reserved_words = Vec::with_capacity(grammar.reserved_words.len());
|
||||
for reserved_word_set in &grammar.reserved_words {
|
||||
let mut interned_set = Vec::new();
|
||||
for rule in &reserved_word_set.reserved_words {
|
||||
interned_set.push(interner.intern_rule(rule, None)?);
|
||||
}
|
||||
reserved_words.push(ReservedWordContext {
|
||||
name: reserved_word_set.name.clone(),
|
||||
reserved_words: interned_set,
|
||||
});
|
||||
}
|
||||
|
||||
let mut expected_conflicts = Vec::new();
|
||||
for conflict in &grammar.expected_conflicts {
|
||||
let mut interned_conflict = Vec::with_capacity(conflict.len());
|
||||
|
|
@ -87,6 +99,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
supertype_symbols,
|
||||
word_token,
|
||||
precedence_orderings: grammar.precedence_orderings.clone(),
|
||||
reserved_word_sets: reserved_words,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -118,6 +131,10 @@ impl Interner<'_> {
|
|||
rule: Box::new(self.intern_rule(rule, name)?),
|
||||
params: params.clone(),
|
||||
}),
|
||||
Rule::Reserved { rule, context_name } => Ok(Rule::Reserved {
|
||||
rule: Box::new(self.intern_rule(rule, name)?),
|
||||
context_name: context_name.clone(),
|
||||
}),
|
||||
Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
|
||||
|| Err(anyhow!("Undefined symbol `{name}`")),
|
||||
|symbol| Ok(Rule::Symbol(symbol)),
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ use super::{
|
|||
},
|
||||
rules::{AliasMap, Precedence, Rule, Symbol},
|
||||
};
|
||||
use crate::grammars::ReservedWordContext;
|
||||
|
||||
pub struct IntermediateGrammar<T, U> {
|
||||
variables: Vec<Variable>,
|
||||
|
|
@ -37,6 +38,7 @@ pub struct IntermediateGrammar<T, U> {
|
|||
variables_to_inline: Vec<Symbol>,
|
||||
supertype_symbols: Vec<Symbol>,
|
||||
word_token: Option<Symbol>,
|
||||
reserved_word_sets: Vec<ReservedWordContext<T>>,
|
||||
}
|
||||
|
||||
pub type InternedGrammar = IntermediateGrammar<Rule, Variable>;
|
||||
|
|
@ -60,6 +62,7 @@ impl<T, U> Default for IntermediateGrammar<T, U> {
|
|||
variables_to_inline: Vec::default(),
|
||||
supertype_symbols: Vec::default(),
|
||||
word_token: Option::default(),
|
||||
reserved_word_sets: Vec::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ use super::{
|
|||
build_tables::Tables,
|
||||
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
|
||||
nfa::CharacterSet,
|
||||
rules::{Alias, AliasMap, Symbol, SymbolType},
|
||||
rules::{Alias, AliasMap, Symbol, SymbolType, TokenSet},
|
||||
tables::{
|
||||
AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
|
||||
ParseTableEntry,
|
||||
|
|
@ -19,7 +19,7 @@ use super::{
|
|||
const SMALL_STATE_THRESHOLD: usize = 64;
|
||||
const ABI_VERSION_MIN: usize = 14;
|
||||
const ABI_VERSION_MAX: usize = tree_sitter::LANGUAGE_VERSION;
|
||||
const ABI_VERSION_WITH_METADATA: usize = 15;
|
||||
const ABI_VERSION_WITH_RESERVED_WORDS: usize = 15;
|
||||
const BUILD_VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
const BUILD_SHA: Option<&'static str> = option_env!("BUILD_SHA");
|
||||
|
||||
|
|
@ -58,6 +58,7 @@ macro_rules! dedent {
|
|||
};
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct Generator {
|
||||
buffer: String,
|
||||
indent_level: usize,
|
||||
|
|
@ -68,7 +69,6 @@ struct Generator {
|
|||
large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
|
||||
large_character_set_info: Vec<LargeCharacterSetInfo>,
|
||||
large_state_count: usize,
|
||||
keyword_capture_token: Option<Symbol>,
|
||||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
default_aliases: AliasMap,
|
||||
|
|
@ -77,6 +77,8 @@ struct Generator {
|
|||
alias_ids: HashMap<Alias, String>,
|
||||
unique_aliases: Vec<Alias>,
|
||||
symbol_map: HashMap<Symbol, Symbol>,
|
||||
reserved_word_sets: Vec<TokenSet>,
|
||||
reserved_word_set_ids_by_parse_state: Vec<usize>,
|
||||
field_names: Vec<String>,
|
||||
|
||||
#[allow(unused)]
|
||||
|
|
@ -119,7 +121,7 @@ impl Generator {
|
|||
swap(&mut main_lex_table, &mut self.main_lex_table);
|
||||
self.add_lex_function("ts_lex", main_lex_table);
|
||||
|
||||
if self.keyword_capture_token.is_some() {
|
||||
if self.syntax_grammar.word_token.is_some() {
|
||||
let mut keyword_lex_table = LexTable::default();
|
||||
swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
|
||||
self.add_lex_function("ts_lex_keywords", keyword_lex_table);
|
||||
|
|
@ -135,7 +137,13 @@ impl Generator {
|
|||
}
|
||||
self.buffer.push_str(&lex_functions);
|
||||
|
||||
self.add_lex_modes_list();
|
||||
self.add_lex_modes();
|
||||
|
||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && self.reserved_word_sets.len() > 1
|
||||
{
|
||||
self.add_reserved_word_sets();
|
||||
}
|
||||
|
||||
self.add_parse_table();
|
||||
|
||||
if !self.syntax_grammar.external_tokens.is_empty() {
|
||||
|
|
@ -266,6 +274,22 @@ impl Generator {
|
|||
});
|
||||
}
|
||||
|
||||
// Assign an id to each unique reserved word set
|
||||
self.reserved_word_sets.push(TokenSet::new());
|
||||
for state in &self.parse_table.states {
|
||||
let id = if let Some(ix) = self
|
||||
.reserved_word_sets
|
||||
.iter()
|
||||
.position(|set| *set == state.reserved_words)
|
||||
{
|
||||
ix
|
||||
} else {
|
||||
self.reserved_word_sets.push(state.reserved_words.clone());
|
||||
self.reserved_word_sets.len() - 1
|
||||
};
|
||||
self.reserved_word_set_ids_by_parse_state.push(id);
|
||||
}
|
||||
|
||||
// Determine which states should use the "small state" representation, and which should
|
||||
// use the normal array representation.
|
||||
let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
|
||||
|
|
@ -365,6 +389,16 @@ impl Generator {
|
|||
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
|
||||
self.parse_table.max_aliased_production_length
|
||||
);
|
||||
add_line!(
|
||||
self,
|
||||
"#define MAX_RESERVED_WORD_SET_SIZE {}",
|
||||
self.reserved_word_sets
|
||||
.iter()
|
||||
.map(TokenSet::len)
|
||||
.max()
|
||||
.unwrap()
|
||||
);
|
||||
|
||||
add_line!(
|
||||
self,
|
||||
"#define PRODUCTION_ID_COUNT {}",
|
||||
|
|
@ -1016,25 +1050,66 @@ impl Generator {
|
|||
}
|
||||
}
|
||||
|
||||
fn add_lex_modes_list(&mut self) {
|
||||
fn add_lex_modes(&mut self) {
|
||||
add_line!(
|
||||
self,
|
||||
"static const TSLexMode ts_lex_modes[STATE_COUNT] = {{"
|
||||
"static const {} ts_lex_modes[STATE_COUNT] = {{",
|
||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
||||
"TSLexerMode"
|
||||
} else {
|
||||
"TSLexMode"
|
||||
}
|
||||
);
|
||||
indent!(self);
|
||||
for (i, state) in self.parse_table.states.iter().enumerate() {
|
||||
add_whitespace!(self);
|
||||
add!(self, "[{}] = {{", i);
|
||||
if state.is_end_of_non_terminal_extra() {
|
||||
add_line!(self, "[{i}] = {{(TSStateId)(-1)}},");
|
||||
} else if state.external_lex_state_id > 0 {
|
||||
add_line!(
|
||||
self,
|
||||
"[{i}] = {{.lex_state = {}, .external_lex_state = {}}},",
|
||||
state.lex_state_id,
|
||||
state.external_lex_state_id
|
||||
);
|
||||
add!(self, "(TSStateId)(-1),");
|
||||
} else {
|
||||
add_line!(self, "[{i}] = {{.lex_state = {}}},", state.lex_state_id);
|
||||
add!(self, ".lex_state = {}", state.lex_state_id);
|
||||
|
||||
if state.external_lex_state_id > 0 {
|
||||
add!(
|
||||
self,
|
||||
", .external_lex_state = {}",
|
||||
state.external_lex_state_id
|
||||
);
|
||||
}
|
||||
|
||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
||||
let reserved_word_set_id = self.reserved_word_set_ids_by_parse_state[i];
|
||||
if reserved_word_set_id != 0 {
|
||||
add!(self, ", .reserved_word_set_id = {reserved_word_set_id}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
add!(self, "}},\n");
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_reserved_word_sets(&mut self) {
|
||||
add_line!(
|
||||
self,
|
||||
"static const TSSymbol ts_reserved_words[{}][MAX_RESERVED_WORD_SET_SIZE] = {{",
|
||||
self.reserved_word_sets.len(),
|
||||
);
|
||||
indent!(self);
|
||||
for (id, set) in self.reserved_word_sets.iter().enumerate() {
|
||||
if id == 0 {
|
||||
continue;
|
||||
}
|
||||
add_line!(self, "[{}] = {{", id);
|
||||
indent!(self);
|
||||
for token in set.iter() {
|
||||
add_line!(self, "{},", self.symbol_ids[&token]);
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}},");
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
|
|
@ -1110,6 +1185,7 @@ impl Generator {
|
|||
let mut parse_table_entries = HashMap::new();
|
||||
let mut next_parse_action_list_index = 0;
|
||||
|
||||
// Parse action lists zero is for the default value, when a symbol is not valid.
|
||||
self.get_parse_action_list_id(
|
||||
&ParseTableEntry {
|
||||
actions: Vec::new(),
|
||||
|
|
@ -1135,7 +1211,7 @@ impl Generator {
|
|||
.enumerate()
|
||||
.take(self.large_state_count)
|
||||
{
|
||||
add_line!(self, "[{i}] = {{");
|
||||
add_line!(self, "[STATE({i})] = {{");
|
||||
indent!(self);
|
||||
|
||||
// Ensure the entries are in a deterministic order, since they are
|
||||
|
|
@ -1167,9 +1243,11 @@ impl Generator {
|
|||
);
|
||||
add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
|
||||
}
|
||||
|
||||
dedent!(self);
|
||||
add_line!(self, "}},");
|
||||
}
|
||||
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
|
|
@ -1178,11 +1256,11 @@ impl Generator {
|
|||
add_line!(self, "static const uint16_t ts_small_parse_table[] = {{");
|
||||
indent!(self);
|
||||
|
||||
let mut index = 0;
|
||||
let mut next_table_index = 0;
|
||||
let mut small_state_indices = Vec::new();
|
||||
let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec<Symbol>>::new();
|
||||
for state in self.parse_table.states.iter().skip(self.large_state_count) {
|
||||
small_state_indices.push(index);
|
||||
small_state_indices.push(next_table_index);
|
||||
symbols_by_value.clear();
|
||||
|
||||
terminal_entries.clear();
|
||||
|
|
@ -1221,10 +1299,16 @@ impl Generator {
|
|||
(symbols.len(), *kind, *value, symbols[0])
|
||||
});
|
||||
|
||||
add_line!(self, "[{index}] = {},", values_with_symbols.len());
|
||||
add_line!(
|
||||
self,
|
||||
"[{next_table_index}] = {},",
|
||||
values_with_symbols.len()
|
||||
);
|
||||
indent!(self);
|
||||
next_table_index += 1;
|
||||
|
||||
for ((value, kind), symbols) in &mut values_with_symbols {
|
||||
next_table_index += 2 + symbols.len();
|
||||
if *kind == SymbolType::NonTerminal {
|
||||
add_line!(self, "STATE({value}), {},", symbols.len());
|
||||
} else {
|
||||
|
|
@ -1240,11 +1324,6 @@ impl Generator {
|
|||
}
|
||||
|
||||
dedent!(self);
|
||||
|
||||
index += 1 + values_with_symbols
|
||||
.iter()
|
||||
.map(|(_, symbols)| 2 + symbols.len())
|
||||
.sum::<usize>();
|
||||
}
|
||||
|
||||
dedent!(self);
|
||||
|
|
@ -1412,9 +1491,9 @@ impl Generator {
|
|||
}
|
||||
|
||||
// Lexing
|
||||
add_line!(self, ".lex_modes = ts_lex_modes,");
|
||||
add_line!(self, ".lex_modes = (const void*)ts_lex_modes,");
|
||||
add_line!(self, ".lex_fn = ts_lex,");
|
||||
if let Some(keyword_capture_token) = self.keyword_capture_token {
|
||||
if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
|
||||
add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
|
||||
add_line!(
|
||||
self,
|
||||
|
|
@ -1439,8 +1518,22 @@ impl Generator {
|
|||
|
||||
add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
|
||||
|
||||
if self.abi_version >= ABI_VERSION_WITH_METADATA {
|
||||
if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
|
||||
add_line!(self, ".name = \"{}\",", self.language_name);
|
||||
|
||||
if self.reserved_word_sets.len() > 1 {
|
||||
add_line!(self, ".reserved_words = &ts_reserved_words[0][0],");
|
||||
}
|
||||
|
||||
add_line!(
|
||||
self,
|
||||
".max_reserved_word_set_size = {},",
|
||||
self.reserved_word_sets
|
||||
.iter()
|
||||
.map(TokenSet::len)
|
||||
.max()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
dedent!(self);
|
||||
|
|
@ -1716,26 +1809,17 @@ pub fn render_c_code(
|
|||
);
|
||||
|
||||
Generator {
|
||||
buffer: String::new(),
|
||||
indent_level: 0,
|
||||
language_name: name.to_string(),
|
||||
large_state_count: 0,
|
||||
parse_table: tables.parse_table,
|
||||
main_lex_table: tables.main_lex_table,
|
||||
keyword_lex_table: tables.keyword_lex_table,
|
||||
keyword_capture_token: tables.word_token,
|
||||
large_character_sets: tables.large_character_sets,
|
||||
large_character_set_info: Vec::new(),
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
default_aliases,
|
||||
symbol_ids: HashMap::new(),
|
||||
symbol_order: HashMap::new(),
|
||||
alias_ids: HashMap::new(),
|
||||
symbol_map: HashMap::new(),
|
||||
unique_aliases: Vec::new(),
|
||||
field_names: Vec::new(),
|
||||
abi_version,
|
||||
..Default::default()
|
||||
}
|
||||
.generate()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,13 +68,17 @@ pub enum Rule {
|
|||
},
|
||||
Repeat(Box<Rule>),
|
||||
Seq(Vec<Rule>),
|
||||
Reserved {
|
||||
rule: Box<Rule>,
|
||||
context_name: String,
|
||||
},
|
||||
}
|
||||
|
||||
// Because tokens are represented as small (~400 max) unsigned integers,
|
||||
// sets of tokens can be efficiently represented as bit vectors with each
|
||||
// index corresponding to a token, and each value representing whether or not
|
||||
// the token is present in the set.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct TokenSet {
|
||||
terminal_bits: SmallBitVec,
|
||||
external_bits: SmallBitVec,
|
||||
|
|
@ -82,6 +86,32 @@ pub struct TokenSet {
|
|||
end_of_nonterminal_extra: bool,
|
||||
}
|
||||
|
||||
impl fmt::Debug for TokenSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_list().entries(self.iter()).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for TokenSet {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for TokenSet {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.terminal_bits
|
||||
.iter()
|
||||
.cmp(other.terminal_bits.iter())
|
||||
.then_with(|| self.external_bits.iter().cmp(other.external_bits.iter()))
|
||||
.then_with(|| self.eof.cmp(&other.eof))
|
||||
.then_with(|| {
|
||||
self.end_of_nonterminal_extra
|
||||
.cmp(&other.end_of_nonterminal_extra)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn field(name: String, content: Self) -> Self {
|
||||
add_metadata(content, move |params| {
|
||||
|
|
@ -154,7 +184,9 @@ impl Rule {
|
|||
match self {
|
||||
Self::Blank | Self::Pattern(..) | Self::NamedSymbol(_) | Self::Symbol(_) => false,
|
||||
Self::String(string) => string.is_empty(),
|
||||
Self::Metadata { rule, .. } | Self::Repeat(rule) => rule.is_empty(),
|
||||
Self::Metadata { rule, .. } | Self::Repeat(rule) | Self::Reserved { rule, .. } => {
|
||||
rule.is_empty()
|
||||
}
|
||||
Self::Choice(rules) => rules.iter().any(Self::is_empty),
|
||||
Self::Seq(rules) => rules.iter().all(Self::is_empty),
|
||||
}
|
||||
|
|
@ -394,6 +426,9 @@ impl TokenSet {
|
|||
};
|
||||
if other.index < vec.len() && vec[other.index] {
|
||||
vec.set(other.index, false);
|
||||
while vec.last() == Some(false) {
|
||||
vec.pop();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
false
|
||||
|
|
@ -406,6 +441,13 @@ impl TokenSet {
|
|||
&& !self.external_bits.iter().any(|a| a)
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.eof as usize
|
||||
+ self.end_of_nonterminal_extra as usize
|
||||
+ self.terminal_bits.iter().filter(|b| *b).count()
|
||||
+ self.external_bits.iter().filter(|b| *b).count()
|
||||
}
|
||||
|
||||
pub fn insert_all_terminals(&mut self, other: &Self) -> bool {
|
||||
let mut result = false;
|
||||
if other.terminal_bits.len() > self.terminal_bits.len() {
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ pub struct ParseState {
|
|||
pub id: ParseStateId,
|
||||
pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
|
||||
pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
|
||||
pub reserved_words: TokenSet,
|
||||
pub lex_state_id: usize,
|
||||
pub external_lex_state_id: usize,
|
||||
pub core_id: usize,
|
||||
|
|
@ -64,7 +65,7 @@ pub struct ProductionInfo {
|
|||
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub struct ParseTable {
|
||||
pub states: Vec<ParseState>,
|
||||
pub symbols: Vec<Symbol>,
|
||||
|
|
|
|||
|
|
@ -57,6 +57,20 @@
|
|||
}
|
||||
},
|
||||
|
||||
"reserved": {
|
||||
"type": "object",
|
||||
"patternProperties": {
|
||||
"^[a-zA-Z_]\\w*$": {
|
||||
"type": "array",
|
||||
"uniqueItems": true,
|
||||
"items": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
},
|
||||
|
||||
"externals": {
|
||||
"type": "array",
|
||||
"uniqueItems": true,
|
||||
|
|
|
|||
|
|
@ -470,6 +470,7 @@ down to a single token.
|
|||
* **Immediate Tokens : `token.immediate(rule)`** - Usually, whitespace (and any other extras, such as comments) is optional before each token. This function means that the token will only match if there is no whitespace.
|
||||
* **Aliases : `alias(rule, name)`** - This function causes the given rule to *appear* with an alternative name in the syntax tree. If `name` is a *symbol*, as in `alias($.foo, $.bar)`, then the aliased rule will *appear* as a [named node][named-vs-anonymous-nodes-section] called `bar`. And if `name` is a *string literal*, as in `alias($.foo, 'bar')`, then the aliased rule will appear as an [anonymous node][named-vs-anonymous-nodes-section], as if the rule had been written as the simple string.
|
||||
* **Field Names : `field(name, rule)`** - This function assigns a *field name* to the child node(s) matched by the given rule. In the resulting syntax tree, you can then use that field name to access specific children.
|
||||
* **Reserved Keywords : `reserved(wordset, rule)`** - This function will override the global reserved word set with the one passed into the `wordset` parameter. This is useful for contextual keywords, such as `if` in JavaScript, which cannot be used as a variable name in most contexts, but can be used as a property name.
|
||||
|
||||
In addition to the `name` and `rules` fields, grammars have a few other optional public fields that influence the behavior of the parser.
|
||||
|
||||
|
|
@ -479,7 +480,8 @@ In addition to the `name` and `rules` fields, grammars have a few other optional
|
|||
* **`externals`** - an array of token names which can be returned by an [*external scanner*](#external-scanners). External scanners allow you to write custom C code which runs during the lexing process in order to handle lexical rules (e.g. Python's indentation tokens) that cannot be described by regular expressions.
|
||||
* **`precedences`** - an array of arrays of strings, where each array of strings defines named precedence levels in descending order. These names can be used in the `prec` functions to define precedence relative only to other names in the array, rather than globally. Can only be used with parse precedence, not lexical precedence.
|
||||
* **`word`** - the name of a token that will match keywords for the purpose of the [keyword extraction](#keyword-extraction) optimization.
|
||||
* **`supertypes`** an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types].
|
||||
* **`supertypes`** - an array of hidden rule names which should be considered to be 'supertypes' in the generated [*node types* file][static-node-types].
|
||||
* **`reserved`** - similar in structure to the main `rules` property, an object of reserved word sets associated with an array of reserved rules. The reserved rule in the array must be a terminal token - meaning it must be a string, regex, or token, or a terminal rule. The *first* reserved word set in the object is the global word set, meaning it applies to every rule in every parse state. However, certain keywords are contextual, depending on the rule. For example, in JavaScript, keywords are typically not allowed as ordinary variables, however, they *can* be used as a property name. In this situation, the `reserved` function would be used, and the word set to pass in would be the name of the word set that is declared in the `reserved` object that coreesponds an empty array, signifying *no* keywords are reserved.
|
||||
|
||||
## Writing the Grammar
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ uint32_t ts_language_version(const TSLanguage *self) {
|
|||
}
|
||||
|
||||
const char *ts_language_name(const TSLanguage *self) {
|
||||
return self->version >= LANGUAGE_VERSION_WITH_METADATA ? self->name : NULL;
|
||||
return self->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL;
|
||||
}
|
||||
|
||||
uint32_t ts_language_field_count(const TSLanguage *self) {
|
||||
|
|
@ -56,6 +56,39 @@ void ts_language_table_entry(
|
|||
}
|
||||
}
|
||||
|
||||
TSLexerMode ts_language_lex_mode_for_state(
|
||||
const TSLanguage *self,
|
||||
TSStateId state
|
||||
) {
|
||||
if (self->version < 15) {
|
||||
TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state];
|
||||
return (TSLexerMode) {
|
||||
.lex_state = mode.lex_state,
|
||||
.external_lex_state = mode.external_lex_state,
|
||||
.reserved_word_set_id = 0,
|
||||
};
|
||||
} else {
|
||||
return self->lex_modes[state];
|
||||
}
|
||||
}
|
||||
|
||||
bool ts_language_is_reserved_word(
|
||||
const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol
|
||||
) {
|
||||
TSLexerMode lex_mode = ts_language_lex_mode_for_state(self, state);
|
||||
if (lex_mode.reserved_word_set_id > 0) {
|
||||
unsigned start = lex_mode.reserved_word_set_id * self->max_reserved_word_set_size;
|
||||
unsigned end = start + self->max_reserved_word_set_size;
|
||||
for (unsigned i = start; i < end; i++) {
|
||||
if (self->reserved_words[i] == symbol) return true;
|
||||
if (self->reserved_words[i] == 0) break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(
|
||||
const TSLanguage *self,
|
||||
TSSymbol symbol
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ extern "C" {
|
|||
|
||||
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
|
||||
|
||||
#define LANGUAGE_VERSION_WITH_METADATA 15
|
||||
#define LANGUAGE_VERSION_WITH_RESERVED_WORDS 15
|
||||
#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -36,9 +36,9 @@ typedef struct {
|
|||
} LookaheadIterator;
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result);
|
||||
|
||||
TSLexerMode ts_language_lex_mode_for_state(const TSLanguage *self, TSStateId state);
|
||||
bool ts_language_is_reserved_word(const TSLanguage *self, TSStateId state, TSSymbol symbol);
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *self, TSSymbol symbol);
|
||||
|
||||
TSSymbol ts_language_public_symbol(const TSLanguage *self, TSSymbol symbol);
|
||||
|
||||
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@
|
|||
static const unsigned MAX_VERSION_COUNT = 6;
|
||||
static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
|
||||
static const unsigned MAX_SUMMARY_DEPTH = 16;
|
||||
static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE;
|
||||
static const unsigned MAX_COST_DIFFERENCE = 18 * ERROR_COST_PER_SKIPPED_TREE;
|
||||
static const unsigned OP_COUNT_PER_PARSER_TIMEOUT_CHECK = 100;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -342,7 +342,7 @@ static bool ts_parser__better_version_exists(
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexMode lex_mode) {
|
||||
static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexerMode lex_mode) {
|
||||
if (ts_language_is_wasm(self->language)) {
|
||||
return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state);
|
||||
} else {
|
||||
|
|
@ -473,10 +473,10 @@ static bool ts_parser__can_reuse_first_leaf(
|
|||
Subtree tree,
|
||||
TableEntry *table_entry
|
||||
) {
|
||||
TSLexMode current_lex_mode = self->language->lex_modes[state];
|
||||
TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
|
||||
TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
|
||||
TSLexMode leaf_lex_mode = self->language->lex_modes[leaf_state];
|
||||
TSLexerMode current_lex_mode = ts_language_lex_mode_for_state(self->language, state);
|
||||
TSLexerMode leaf_lex_mode = ts_language_lex_mode_for_state(self->language, leaf_state);
|
||||
|
||||
// At the end of a non-terminal extra node, the lexer normally returns
|
||||
// NULL, which indicates that the parser should look for a reduce action
|
||||
|
|
@ -487,7 +487,7 @@ static bool ts_parser__can_reuse_first_leaf(
|
|||
// If the token was created in a state with the same set of lookaheads, it is reusable.
|
||||
if (
|
||||
table_entry->action_count > 0 &&
|
||||
memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexMode)) == 0 &&
|
||||
memcmp(&leaf_lex_mode, ¤t_lex_mode, sizeof(TSLexerMode)) == 0 &&
|
||||
(
|
||||
leaf_symbol != self->language->keyword_capture_token ||
|
||||
(!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state)
|
||||
|
|
@ -507,7 +507,7 @@ static Subtree ts_parser__lex(
|
|||
StackVersion version,
|
||||
TSStateId parse_state
|
||||
) {
|
||||
TSLexMode lex_mode = self->language->lex_modes[parse_state];
|
||||
TSLexerMode lex_mode = ts_language_lex_mode_for_state(self->language, parse_state);
|
||||
if (lex_mode.lex_state == (uint16_t)-1) {
|
||||
LOG("no_lookahead_after_non_terminal_extra");
|
||||
return NULL_SUBTREE;
|
||||
|
|
@ -601,7 +601,7 @@ static Subtree ts_parser__lex(
|
|||
|
||||
if (!error_mode) {
|
||||
error_mode = true;
|
||||
lex_mode = self->language->lex_modes[ERROR_STATE];
|
||||
lex_mode = ts_language_lex_mode_for_state(self->language, ERROR_STATE);
|
||||
ts_lexer_reset(&self->lexer, start_position);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -658,7 +658,10 @@ static Subtree ts_parser__lex(
|
|||
if (
|
||||
is_keyword &&
|
||||
self->lexer.token_end_position.bytes == end_byte &&
|
||||
ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol)
|
||||
(
|
||||
ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) ||
|
||||
ts_language_is_reserved_word(self->language, parse_state, self->lexer.data.result_symbol)
|
||||
)
|
||||
) {
|
||||
symbol = self->lexer.data.result_symbol;
|
||||
}
|
||||
|
|
@ -1684,15 +1687,20 @@ static bool ts_parser__advance(
|
|||
return true;
|
||||
}
|
||||
|
||||
// If there were no parse actions for the current lookahead token, then
|
||||
// it is not valid in this state. If the current lookahead token is a
|
||||
// keyword, then switch to treating it as the normal word token if that
|
||||
// token is valid in this state.
|
||||
// If the current lookahead token is a keyword that is not valid, but the
|
||||
// default word token *is* valid, then treat the lookahead token as the word
|
||||
// token instead.
|
||||
if (
|
||||
ts_subtree_is_keyword(lookahead) &&
|
||||
ts_subtree_symbol(lookahead) != self->language->keyword_capture_token
|
||||
ts_subtree_symbol(lookahead) != self->language->keyword_capture_token &&
|
||||
!ts_language_is_reserved_word(self->language, state, ts_subtree_symbol(lookahead))
|
||||
) {
|
||||
ts_language_table_entry(self->language, state, self->language->keyword_capture_token, &table_entry);
|
||||
ts_language_table_entry(
|
||||
self->language,
|
||||
state,
|
||||
self->language->keyword_capture_token,
|
||||
&table_entry
|
||||
);
|
||||
if (table_entry.action_count > 0) {
|
||||
LOG(
|
||||
"switch from_keyword:%s, to_word_token:%s",
|
||||
|
|
@ -1707,19 +1715,10 @@ static bool ts_parser__advance(
|
|||
}
|
||||
}
|
||||
|
||||
// If the current lookahead token is not valid and the parser is
|
||||
// already in the error state, restart the error recovery process.
|
||||
// TODO - can this be unified with the other `RECOVER` case above?
|
||||
if (state == ERROR_STATE) {
|
||||
ts_parser__recover(self, version, lookahead);
|
||||
return true;
|
||||
}
|
||||
|
||||
// If the current lookahead token is not valid and the previous
|
||||
// subtree on the stack was reused from an old tree, it isn't actually
|
||||
// valid to reuse it. Remove it from the stack, and in its place,
|
||||
// push each of its children. Then try again to process the current
|
||||
// lookahead.
|
||||
// If the current lookahead token is not valid and the previous subtree on
|
||||
// the stack was reused from an old tree, then it wasn't actually valid to
|
||||
// reuse that previous subtree. Remove it from the stack, and in its place,
|
||||
// push each of its children. Then try again to process the current lookahead.
|
||||
if (ts_parser__breakdown_top_of_stack(self, version)) {
|
||||
state = ts_stack_state(self->stack, version);
|
||||
ts_subtree_release(&self->tree_pool, lookahead);
|
||||
|
|
@ -1727,11 +1726,11 @@ static bool ts_parser__advance(
|
|||
continue;
|
||||
}
|
||||
|
||||
// At this point, the current lookahead token is definitely not valid
|
||||
// for this parse stack version. Mark this version as paused and continue
|
||||
// processing any other stack versions that might exist. If some other
|
||||
// version advances successfully, then this version can simply be removed.
|
||||
// But if all versions end up paused, then error recovery is needed.
|
||||
// Otherwise, there is definitely an error in this version of the parse stack.
|
||||
// Mark this version as paused and continue processing any other stack
|
||||
// versions that exist. If some other version advances successfully, then
|
||||
// this version can simply be removed. But if all versions end up paused,
|
||||
// then error recovery is needed.
|
||||
LOG("detect_error");
|
||||
ts_stack_pause(self->stack, version, lookahead);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -79,6 +79,12 @@ typedef struct {
|
|||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_lex_state;
|
||||
uint16_t reserved_word_set_id;
|
||||
} TSLexerMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
|
|
@ -115,7 +121,7 @@ struct TSLanguage {
|
|||
const TSSymbol *public_symbol_map;
|
||||
const uint16_t *alias_map;
|
||||
const TSSymbol *alias_sequences;
|
||||
const TSLexMode *lex_modes;
|
||||
const TSLexerMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
||||
TSSymbol keyword_capture_token;
|
||||
|
|
@ -130,6 +136,8 @@ struct TSLanguage {
|
|||
} external_scanner;
|
||||
const TSStateId *primary_state_ids;
|
||||
const char *name;
|
||||
const TSSymbol *reserved_words;
|
||||
uint16_t max_reserved_word_set_size;
|
||||
};
|
||||
|
||||
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -153,6 +153,9 @@ typedef struct {
|
|||
int32_t deserialize;
|
||||
} external_scanner;
|
||||
int32_t primary_state_ids;
|
||||
int32_t name;
|
||||
int32_t reserved_words;
|
||||
uint16_t max_reserved_word_set_size;
|
||||
} LanguageInWasmMemory;
|
||||
|
||||
// LexerInWasmMemory - The memory layout of a `TSLexer` when compiled to wasm32.
|
||||
|
|
@ -414,6 +417,17 @@ static void *copy_strings(
|
|||
return result;
|
||||
}
|
||||
|
||||
static void *copy_string(
|
||||
const uint8_t *data,
|
||||
int32_t address
|
||||
) {
|
||||
const char *string = (const char *)&data[address];
|
||||
size_t len = strlen(string);
|
||||
char *result = ts_malloc(len + 1);
|
||||
memcpy(result, string, len + 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool name_eq(const wasm_name_t *name, const char *string) {
|
||||
return strncmp(string, name->data, name->size) == 0;
|
||||
}
|
||||
|
|
@ -1202,24 +1216,24 @@ const TSLanguage *ts_wasm_store_load_language(
|
|||
memcpy(&wasm_language, &memory[language_address], sizeof(LanguageInWasmMemory));
|
||||
|
||||
int32_t addresses[] = {
|
||||
wasm_language.alias_map,
|
||||
wasm_language.alias_sequences,
|
||||
wasm_language.field_map_entries,
|
||||
wasm_language.field_map_slices,
|
||||
wasm_language.field_names,
|
||||
wasm_language.keyword_lex_fn,
|
||||
wasm_language.lex_fn,
|
||||
wasm_language.lex_modes,
|
||||
wasm_language.parse_actions,
|
||||
wasm_language.parse_table,
|
||||
wasm_language.primary_state_ids,
|
||||
wasm_language.primary_state_ids,
|
||||
wasm_language.public_symbol_map,
|
||||
wasm_language.small_parse_table,
|
||||
wasm_language.small_parse_table_map,
|
||||
wasm_language.symbol_metadata,
|
||||
wasm_language.symbol_metadata,
|
||||
wasm_language.parse_actions,
|
||||
wasm_language.symbol_names,
|
||||
wasm_language.field_names,
|
||||
wasm_language.field_map_slices,
|
||||
wasm_language.field_map_entries,
|
||||
wasm_language.symbol_metadata,
|
||||
wasm_language.public_symbol_map,
|
||||
wasm_language.alias_map,
|
||||
wasm_language.alias_sequences,
|
||||
wasm_language.lex_modes,
|
||||
wasm_language.lex_fn,
|
||||
wasm_language.keyword_lex_fn,
|
||||
wasm_language.primary_state_ids,
|
||||
wasm_language.name,
|
||||
wasm_language.reserved_words,
|
||||
wasm_language.external_token_count > 0 ? wasm_language.external_scanner.states : 0,
|
||||
wasm_language.external_token_count > 0 ? wasm_language.external_scanner.symbol_map : 0,
|
||||
wasm_language.external_token_count > 0 ? wasm_language.external_scanner.create : 0,
|
||||
|
|
@ -1274,7 +1288,7 @@ const TSLanguage *ts_wasm_store_load_language(
|
|||
),
|
||||
.lex_modes = copy(
|
||||
&memory[wasm_language.lex_modes],
|
||||
wasm_language.state_count * sizeof(TSLexMode)
|
||||
wasm_language.state_count * sizeof(TSLexerMode)
|
||||
),
|
||||
};
|
||||
|
||||
|
|
@ -1350,6 +1364,15 @@ const TSLanguage *ts_wasm_store_load_language(
|
|||
);
|
||||
}
|
||||
|
||||
if (language->version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
|
||||
language->name = copy_string(memory, wasm_language.name);
|
||||
language->reserved_words = copy(
|
||||
&memory[wasm_language.reserved_words],
|
||||
wasm_language.max_reserved_word_set_size * sizeof(TSSymbol)
|
||||
);
|
||||
language->max_reserved_word_set_size = wasm_language.max_reserved_word_set_size;
|
||||
}
|
||||
|
||||
if (language->external_token_count > 0) {
|
||||
language->external_scanner.symbol_map = copy(
|
||||
&memory[wasm_language.external_scanner.symbol_map],
|
||||
|
|
@ -1731,6 +1754,8 @@ void ts_wasm_language_release(const TSLanguage *self) {
|
|||
ts_free((void *)self->field_map_slices);
|
||||
ts_free((void *)self->field_names);
|
||||
ts_free((void *)self->lex_modes);
|
||||
ts_free((void *)self->name);
|
||||
ts_free((void *)self->reserved_words);
|
||||
ts_free((void *)self->parse_actions);
|
||||
ts_free((void *)self->parse_table);
|
||||
ts_free((void *)self->primary_state_ids);
|
||||
|
|
|
|||
101
test/fixtures/test_grammars/reserved_words/corpus.txt
vendored
Normal file
101
test/fixtures/test_grammars/reserved_words/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
==============
|
||||
Valid Code
|
||||
==============
|
||||
|
||||
if (a) {
|
||||
var b = {
|
||||
c: d,
|
||||
e: f,
|
||||
};
|
||||
while (g) {
|
||||
h();
|
||||
}
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(if_statement
|
||||
(parenthesized_expression (identifier))
|
||||
(block
|
||||
(var_declaration
|
||||
(identifier)
|
||||
(object
|
||||
(pair (identifier) (identifier))
|
||||
(pair (identifier) (identifier))))
|
||||
(while_statement
|
||||
(parenthesized_expression (identifier))
|
||||
(block (expression_statement (call_expression (identifier))))))))
|
||||
|
||||
================================================
|
||||
Error detected at globally-reserved word
|
||||
================================================
|
||||
|
||||
var a =
|
||||
|
||||
if (something) {
|
||||
c();
|
||||
}
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(ERROR (identifier))
|
||||
(if_statement
|
||||
(parenthesized_expression (identifier))
|
||||
(block
|
||||
(expression_statement (call_expression (identifier))))))
|
||||
|
||||
================================================
|
||||
Object keys that are reserved in other contexts
|
||||
================================================
|
||||
|
||||
var x = {
|
||||
if: a,
|
||||
while: b,
|
||||
};
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(var_declaration
|
||||
(identifier)
|
||||
(object
|
||||
(pair (identifier) (identifier))
|
||||
(pair (identifier) (identifier)))))
|
||||
|
||||
================================================
|
||||
Error detected at context-specific reserved word
|
||||
================================================
|
||||
|
||||
var x = {
|
||||
var y = z;
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(ERROR (identifier))
|
||||
|
||||
; Important - var declaration is still recognized,
|
||||
; because in this example grammar, `var` is a keyword
|
||||
; even within object literals.
|
||||
(var_declaration
|
||||
(identifier)
|
||||
(identifier)))
|
||||
|
||||
=============================================
|
||||
Other tokens that overlap with keyword tokens
|
||||
=============================================
|
||||
|
||||
var a = /reserved-words-should-not-affect-this/;
|
||||
var d = /if/;
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(var_declaration
|
||||
(identifier)
|
||||
(regex (regex_pattern)))
|
||||
(var_declaration
|
||||
(identifier)
|
||||
(regex (regex_pattern))))
|
||||
67
test/fixtures/test_grammars/reserved_words/grammar.js
vendored
Normal file
67
test/fixtures/test_grammars/reserved_words/grammar.js
vendored
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
const RESERVED_NAMES = ["if", "while", "var"];
|
||||
const RESERVED_PROPERTY_NAMES = ["var"];
|
||||
|
||||
module.exports = grammar({
|
||||
name: "reserved_words",
|
||||
|
||||
reserved: {
|
||||
global: $ => RESERVED_NAMES,
|
||||
property: $ => RESERVED_PROPERTY_NAMES,
|
||||
},
|
||||
|
||||
word: $ => $.identifier,
|
||||
|
||||
rules: {
|
||||
program: $ => repeat($._statement),
|
||||
|
||||
block: $ => seq("{", repeat($._statement), "}"),
|
||||
|
||||
_statement: $ => choice(
|
||||
$.var_declaration,
|
||||
$.if_statement,
|
||||
$.while_statement,
|
||||
$.expression_statement,
|
||||
),
|
||||
|
||||
var_declaration: $ => seq("var", $.identifier, "=", $._expression, ";"),
|
||||
|
||||
if_statement: $ => seq("if", $.parenthesized_expression, $.block),
|
||||
|
||||
while_statement: $ => seq("while", $.parenthesized_expression, $.block),
|
||||
|
||||
expression_statement: $ => seq($._expression, ";"),
|
||||
|
||||
_expression: $ => choice(
|
||||
$.identifier,
|
||||
$.parenthesized_expression,
|
||||
$.call_expression,
|
||||
$.member_expression,
|
||||
$.object,
|
||||
$.regex,
|
||||
),
|
||||
|
||||
parenthesized_expression: $ => seq("(", $._expression, ")"),
|
||||
|
||||
member_expression: $ => seq($._expression, ".", $.identifier),
|
||||
|
||||
call_expression: $ => seq($._expression, "(", repeat(seq($._expression, ",")), ")"),
|
||||
|
||||
object: $ => seq("{", repeat(seq(choice($.pair, $.getter), ",")), "}"),
|
||||
|
||||
regex: $ => seq('/', $.regex_pattern, '/'),
|
||||
|
||||
regex_pattern: $ => token(prec(-1, /[^/\n]+/)),
|
||||
|
||||
pair: $ => seq(reserved('property', $.identifier), ":", $._expression),
|
||||
|
||||
getter: $ => seq(
|
||||
"get",
|
||||
reserved('property', $.identifier),
|
||||
"(",
|
||||
")",
|
||||
$.block,
|
||||
),
|
||||
|
||||
identifier: $ => /[a-z_]\w*/,
|
||||
},
|
||||
});
|
||||
|
|
@ -182,13 +182,21 @@ pub fn run_wasm(args: &BuildWasm) -> Result<()> {
|
|||
pub fn run_wasm_stdlib() -> Result<()> {
|
||||
let export_flags = include_str!("../../lib/src/wasm/stdlib-symbols.txt")
|
||||
.lines()
|
||||
.map(|line| format!("-Wl,--export={}", &line[1..line.len() - 1]))
|
||||
.map(|line| format!("-Wl,--export={}", &line[1..line.len() - 2]))
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let mut command = Command::new("target/wasi-sdk-21.0/bin/clang-17");
|
||||
let mut command = Command::new("docker");
|
||||
|
||||
let output = command
|
||||
.args([
|
||||
"run",
|
||||
"--rm",
|
||||
"-v",
|
||||
format!("{}:/src", std::env::current_dir().unwrap().display()).as_str(),
|
||||
"-w",
|
||||
"/src",
|
||||
"ghcr.io/webassembly/wasi-sdk",
|
||||
"/opt/wasi-sdk/bin/clang",
|
||||
"-o",
|
||||
"stdlib.wasm",
|
||||
"-Os",
|
||||
|
|
|
|||
|
|
@ -139,6 +139,9 @@ struct Test {
|
|||
/// Don't capture the output
|
||||
#[arg(long)]
|
||||
nocapture: bool,
|
||||
/// Enable the wasm tests.
|
||||
#[arg(long, short)]
|
||||
wasm: bool,
|
||||
}
|
||||
|
||||
#[derive(Args)]
|
||||
|
|
|
|||
|
|
@ -90,6 +90,9 @@ pub fn run(args: &Test) -> Result<()> {
|
|||
} else {
|
||||
let mut cargo_cmd = Command::new("cargo");
|
||||
cargo_cmd.arg("test");
|
||||
if args.wasm {
|
||||
cargo_cmd.arg("--features").arg("wasm");
|
||||
}
|
||||
if !test_flags.is_empty() {
|
||||
cargo_cmd.arg(test_flags);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue