From d2e06bf130a479ba90f2657fd67f129e5a9ad69f Mon Sep 17 00:00:00 2001 From: WillLillis Date: Tue, 11 Mar 2025 17:21:33 -0400 Subject: [PATCH] fix(generate): use topological sort for subtype map --- Cargo.lock | 8 +++ Cargo.toml | 1 + crates/cli/Cargo.toml | 1 + crates/generate/Cargo.toml | 1 + crates/generate/src/generate.rs | 4 +- crates/generate/src/node_types.rs | 96 +++++++++++++++++++++++-------- 6 files changed, 85 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a82b72c9..9607fbdc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1937,6 +1937,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" +[[package]] +name = "topological-sort" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" + [[package]] name = "tracing" version = "0.1.41" @@ -2019,6 +2025,7 @@ dependencies = [ "streaming-iterator", "tempfile", "tiny_http", + "topological-sort", "tree-sitter", "tree-sitter-config", "tree-sitter-generate", @@ -2061,6 +2068,7 @@ dependencies = [ "serde_json", "smallbitvec", "thiserror 2.0.12", + "topological-sort", "tree-sitter", "url", ] diff --git a/Cargo.toml b/Cargo.toml index 08b8a92d..704bd747 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -148,6 +148,7 @@ tempfile = "3.20.0" thiserror = "2.0.12" tiny_http = "0.12.0" toml = "0.8.23" +topological-sort = "0.2.2" unindent = "0.2.4" url = { version = "2.5.4", features = ["serde"] } walkdir = "2.5.0" diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 463e063e..9bb53de0 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -62,6 +62,7 @@ similar.workspace = true smallbitvec.workspace = true streaming-iterator.workspace = true tiny_http.workspace = true +topological-sort.workspace = true url.workspace = true walkdir.workspace = true wasmparser.workspace = true diff --git a/crates/generate/Cargo.toml b/crates/generate/Cargo.toml index d48a0eb9..629d6469 100644 --- a/crates/generate/Cargo.toml +++ b/crates/generate/Cargo.toml @@ -32,6 +32,7 @@ serde.workspace = true serde_json.workspace = true smallbitvec.workspace = true thiserror.workspace = true +topological-sort.workspace = true tree-sitter.workspace = true diff --git a/crates/generate/src/generate.rs b/crates/generate/src/generate.rs index a981d118..6a398531 100644 --- a/crates/generate/src/generate.rs +++ b/crates/generate/src/generate.rs @@ -27,7 +27,7 @@ mod tables; use build_tables::build_tables; pub use build_tables::ParseTableBuilderError; use grammars::InputGrammar; -pub use node_types::VariableInfoError; +pub use node_types::{SuperTypeCycleError, VariableInfoError}; use parse_grammar::parse_grammar; pub use parse_grammar::ParseGrammarError; use prepare_grammar::prepare_grammar; @@ -70,6 +70,8 @@ pub enum GenerateError { BuildTables(#[from] ParseTableBuilderError), #[error(transparent)] ParseVersion(#[from] ParseVersionError), + #[error(transparent)] + SuperTypeCycle(#[from] SuperTypeCycleError), } impl From for GenerateError { diff --git a/crates/generate/src/node_types.rs b/crates/generate/src/node_types.rs index b5867c6e..d172b244 100644 --- a/crates/generate/src/node_types.rs +++ b/crates/generate/src/node_types.rs @@ -1,7 +1,4 @@ -use std::{ - cmp::Ordering, - collections::{BTreeMap, HashMap, HashSet}, -}; +use std::collections::{BTreeMap, HashMap, HashSet}; use anyhow::Result; use serde::Serialize; @@ -444,12 +441,33 @@ pub fn get_supertype_symbol_map( supertype_symbol_map } +pub type SuperTypeCycleResult = Result; + +#[derive(Debug, Error, Serialize)] +pub struct SuperTypeCycleError { + items: Vec, +} + +impl std::fmt::Display for SuperTypeCycleError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Dependency cycle detected in node types:")?; + for (i, item) in self.items.iter().enumerate() { + write!(f, " {item}")?; + if i < self.items.len() - 1 { + write!(f, ",")?; + } + } + + Ok(()) + } +} + pub fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, default_aliases: &AliasMap, variable_info: &[VariableInfo], -) -> Vec { +) -> SuperTypeCycleResult> { let mut node_types_json = BTreeMap::new(); let child_type_to_node_type = |child_type: &ChildType| match child_type { @@ -627,15 +645,33 @@ pub fn generate_node_types_json( } } - // Sort the subtype map so that subtypes are listed before their supertypes. - subtype_map.sort_by(|a, b| { - if b.1.contains(&a.0) { - Ordering::Less - } else if a.1.contains(&b.0) { - Ordering::Greater - } else { - Ordering::Equal + // Sort the subtype map topologically so that subtypes are listed before their supertypes. + let mut sorted_kinds = Vec::with_capacity(subtype_map.len()); + let mut top_sort = topological_sort::TopologicalSort::::new(); + for (supertype, subtypes) in &subtype_map { + for subtype in subtypes { + top_sort.add_dependency(subtype.kind.clone(), supertype.kind.clone()); } + } + loop { + let mut next_kinds = top_sort.pop_all(); + match (next_kinds.is_empty(), top_sort.is_empty()) { + (true, true) => break, + (true, false) => { + let mut items = top_sort.collect::>(); + items.sort(); + return Err(SuperTypeCycleError { items }); + } + (false, _) => { + next_kinds.sort(); + sorted_kinds.extend(next_kinds); + } + } + } + subtype_map.sort_by(|a, b| { + let a_idx = sorted_kinds.iter().position(|n| n.eq(&a.0.kind)).unwrap(); + let b_idx = sorted_kinds.iter().position(|n| n.eq(&b.0.kind)).unwrap(); + a_idx.cmp(&b_idx) }); for node_type_json in node_types_json.values_mut() { @@ -744,7 +780,7 @@ pub fn generate_node_types_json( .then_with(|| a.kind.cmp(&b.kind)) }); result.dedup(); - result + Ok(result) } fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec)]) { @@ -830,7 +866,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!(node_types.len(), 3); @@ -929,7 +966,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!(node_types.len(), 4); @@ -1152,7 +1190,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!( node_types[0], @@ -1241,7 +1280,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!( node_types[0], @@ -1326,7 +1366,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!( node_types[0], @@ -1400,7 +1441,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None); assert_eq!( @@ -1456,7 +1498,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!( node_types[0], @@ -1505,7 +1548,8 @@ mod tests { ]), }], ..Default::default() - }); + }) + .unwrap(); assert_eq!( node_types, @@ -1553,7 +1597,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!( &node_types @@ -1672,7 +1717,8 @@ mod tests { }, ], ..Default::default() - }); + }) + .unwrap(); assert_eq!( node_types.iter().map(|n| &n.kind).collect::>(), @@ -1999,7 +2045,7 @@ mod tests { ); } - fn get_node_types(grammar: &InputGrammar) -> Vec { + fn get_node_types(grammar: &InputGrammar) -> SuperTypeCycleResult> { let (syntax_grammar, lexical_grammar, _, default_aliases) = prepare_grammar(grammar).unwrap(); let variable_info =