fix(generate): use topological sort for subtype map

This commit is contained in:
WillLillis 2025-03-11 17:21:33 -04:00 committed by Will Lillis
parent 36d93aeff3
commit d2e06bf130
6 changed files with 85 additions and 26 deletions

8
Cargo.lock generated
View file

@ -1937,6 +1937,12 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]]
name = "topological-sort"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d"
[[package]]
name = "tracing"
version = "0.1.41"
@ -2019,6 +2025,7 @@ dependencies = [
"streaming-iterator",
"tempfile",
"tiny_http",
"topological-sort",
"tree-sitter",
"tree-sitter-config",
"tree-sitter-generate",
@ -2061,6 +2068,7 @@ dependencies = [
"serde_json",
"smallbitvec",
"thiserror 2.0.12",
"topological-sort",
"tree-sitter",
"url",
]

View file

@ -148,6 +148,7 @@ tempfile = "3.20.0"
thiserror = "2.0.12"
tiny_http = "0.12.0"
toml = "0.8.23"
topological-sort = "0.2.2"
unindent = "0.2.4"
url = { version = "2.5.4", features = ["serde"] }
walkdir = "2.5.0"

View file

@ -62,6 +62,7 @@ similar.workspace = true
smallbitvec.workspace = true
streaming-iterator.workspace = true
tiny_http.workspace = true
topological-sort.workspace = true
url.workspace = true
walkdir.workspace = true
wasmparser.workspace = true

View file

@ -32,6 +32,7 @@ serde.workspace = true
serde_json.workspace = true
smallbitvec.workspace = true
thiserror.workspace = true
topological-sort.workspace = true
tree-sitter.workspace = true

View file

@ -27,7 +27,7 @@ mod tables;
use build_tables::build_tables;
pub use build_tables::ParseTableBuilderError;
use grammars::InputGrammar;
pub use node_types::VariableInfoError;
pub use node_types::{SuperTypeCycleError, VariableInfoError};
use parse_grammar::parse_grammar;
pub use parse_grammar::ParseGrammarError;
use prepare_grammar::prepare_grammar;
@ -70,6 +70,8 @@ pub enum GenerateError {
BuildTables(#[from] ParseTableBuilderError),
#[error(transparent)]
ParseVersion(#[from] ParseVersionError),
#[error(transparent)]
SuperTypeCycle(#[from] SuperTypeCycleError),
}
impl From<std::io::Error> for GenerateError {

View file

@ -1,7 +1,4 @@
use std::{
cmp::Ordering,
collections::{BTreeMap, HashMap, HashSet},
};
use std::collections::{BTreeMap, HashMap, HashSet};
use anyhow::Result;
use serde::Serialize;
@ -444,12 +441,33 @@ pub fn get_supertype_symbol_map(
supertype_symbol_map
}
pub type SuperTypeCycleResult<T> = Result<T, SuperTypeCycleError>;
#[derive(Debug, Error, Serialize)]
pub struct SuperTypeCycleError {
items: Vec<String>,
}
impl std::fmt::Display for SuperTypeCycleError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Dependency cycle detected in node types:")?;
for (i, item) in self.items.iter().enumerate() {
write!(f, " {item}")?;
if i < self.items.len() - 1 {
write!(f, ",")?;
}
}
Ok(())
}
}
pub fn generate_node_types_json(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
default_aliases: &AliasMap,
variable_info: &[VariableInfo],
) -> Vec<NodeInfoJSON> {
) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
let mut node_types_json = BTreeMap::new();
let child_type_to_node_type = |child_type: &ChildType| match child_type {
@ -627,15 +645,33 @@ pub fn generate_node_types_json(
}
}
// Sort the subtype map so that subtypes are listed before their supertypes.
subtype_map.sort_by(|a, b| {
if b.1.contains(&a.0) {
Ordering::Less
} else if a.1.contains(&b.0) {
Ordering::Greater
} else {
Ordering::Equal
// Sort the subtype map topologically so that subtypes are listed before their supertypes.
let mut sorted_kinds = Vec::with_capacity(subtype_map.len());
let mut top_sort = topological_sort::TopologicalSort::<String>::new();
for (supertype, subtypes) in &subtype_map {
for subtype in subtypes {
top_sort.add_dependency(subtype.kind.clone(), supertype.kind.clone());
}
}
loop {
let mut next_kinds = top_sort.pop_all();
match (next_kinds.is_empty(), top_sort.is_empty()) {
(true, true) => break,
(true, false) => {
let mut items = top_sort.collect::<Vec<String>>();
items.sort();
return Err(SuperTypeCycleError { items });
}
(false, _) => {
next_kinds.sort();
sorted_kinds.extend(next_kinds);
}
}
}
subtype_map.sort_by(|a, b| {
let a_idx = sorted_kinds.iter().position(|n| n.eq(&a.0.kind)).unwrap();
let b_idx = sorted_kinds.iter().position(|n| n.eq(&b.0.kind)).unwrap();
a_idx.cmp(&b_idx)
});
for node_type_json in node_types_json.values_mut() {
@ -744,7 +780,7 @@ pub fn generate_node_types_json(
.then_with(|| a.kind.cmp(&b.kind))
});
result.dedup();
result
Ok(result)
}
fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec<NodeTypeJSON>)]) {
@ -830,7 +866,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(node_types.len(), 3);
@ -929,7 +966,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(node_types.len(), 4);
@ -1152,7 +1190,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(
node_types[0],
@ -1241,7 +1280,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(
node_types[0],
@ -1326,7 +1366,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(
node_types[0],
@ -1400,7 +1441,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None);
assert_eq!(
@ -1456,7 +1498,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(
node_types[0],
@ -1505,7 +1548,8 @@ mod tests {
]),
}],
..Default::default()
});
})
.unwrap();
assert_eq!(
node_types,
@ -1553,7 +1597,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(
&node_types
@ -1672,7 +1717,8 @@ mod tests {
},
],
..Default::default()
});
})
.unwrap();
assert_eq!(
node_types.iter().map(|n| &n.kind).collect::<Vec<_>>(),
@ -1999,7 +2045,7 @@ mod tests {
);
}
fn get_node_types(grammar: &InputGrammar) -> Vec<NodeInfoJSON> {
fn get_node_types(grammar: &InputGrammar) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
let (syntax_grammar, lexical_grammar, _, default_aliases) =
prepare_grammar(grammar).unwrap();
let variable_info =