Remove extra lookups and memory allocations from tsort graph construction (#8694)

This commit is contained in:
Dmitry Sviridkin 2025-09-28 21:46:55 +01:00 committed by GitHub
parent 33e37c6cf3
commit ec7e88ff5b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -63,11 +63,23 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// Create the directed graph from pairs of tokens in the input data.
let mut g = Graph::new(input.to_string_lossy().to_string());
for ab in data.split_whitespace().collect::<Vec<&str>>().chunks(2) {
match ab {
[a, b] => g.add_edge(a, b),
_ => return Err(TsortError::NumTokensOdd(input.to_string_lossy().to_string()).into()),
}
// Input is considered to be in the format
// From1 To1 From2 To2 ...
// with tokens separated by whitespaces
let mut edge_tokens = data.split_whitespace();
// Note: this is equivalent to iterating over edge_tokens.chunks(2)
// but chunks() exists only for slices and would require unnecessary Vec allocation.
// Itertools::chunks() is not used due to unnecessary overhead for internal RefCells
loop {
// Try take next pair of tokens
let Some(from) = edge_tokens.next() else {
// no more tokens -> end of input. Graph constructed
break;
};
let Some(to) = edge_tokens.next() else {
return Err(TsortError::NumTokensOdd(input.to_string_lossy().to_string()).into());
};
g.add_edge(from, to);
}
g.run_tsort();
@ -127,19 +139,11 @@ impl<'input> Graph<'input> {
}
}
fn add_node(&mut self, name: &'input str) {
self.nodes.entry(name).or_default();
}
fn add_edge(&mut self, from: &'input str, to: &'input str) {
self.add_node(from);
let from_node = self.nodes.entry(from).or_default();
if from != to {
self.add_node(to);
let from_node = self.nodes.get_mut(from).unwrap();
from_node.add_successor(to);
let to_node = self.nodes.get_mut(to).unwrap();
let to_node = self.nodes.entry(to).or_default();
to_node.predecessor_count += 1;
}
}
@ -233,10 +237,7 @@ impl<'input> Graph<'input> {
fn detect_cycle(&self) -> Vec<&'input str> {
// Sort the nodes just to make this function deterministic.
let mut nodes = Vec::new();
for node in self.nodes.keys() {
nodes.push(node);
}
let mut nodes: Vec<_> = self.nodes.keys().collect();
nodes.sort_unstable();
let mut visited = HashSet::new();