Fix sorting by vendoring the pathfinding crate.

2025-10-03 08:34:33 +00:00 · 2019-08-01 18:38:53 -04:00 · 2019-08-01 18:38:53 -04:00 · 00a02d597a
commit 00a02d597a
parent e3e92b56fb
8 changed files with 507 additions and 143 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -87,11 +87,6 @@ dependencies = [
 "typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

-[[package]]
-name = "indexmap"
-version = "1.0.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "indoc"
 version = "0.3.3"
@ -113,14 +108,6 @@ dependencies = [
 "unindent 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

-[[package]]
-name = "itertools"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "lazy_static"
 version = "1.3.0"
@ -218,17 +205,6 @@ name = "ordermap"
 version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"

-[[package]]
-name = "pathfinding"
-version = "1.1.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-dependencies = [
- "fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
- "indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
- "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
-]
-
 [[package]]
 name = "petgraph"
 version = "0.4.13"
@ -286,7 +262,6 @@ dependencies = [
 "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "num 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "pathfinding 1.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
 "petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
 "pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@ -391,10 +366,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum fraction 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1055159ac82fb210c813303f716b6c8db57ace9d5ec2dbbc2e1d7a864c1dd74e"
 "checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
 "checksum im-rc 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0a0197597d095c0d11107975d3175173f810ee572c2501ff4de64f4f3f119806"
-"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
 "checksum indoc 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1f59f228c76fda6ecd8dab79683039a7054c748587f682a911094f473647bd6"
 "checksum indoc-impl 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "63f070ef080db3601c1a0ecc75c7bb35104cc0ce2d7c4e049952a96a61d8933b"
-"checksum itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5b8467d9c1cebe26feb08c640139247fac215782d35371ade9a2136ed6085358"
 "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
 "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
 "checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43"
@ -407,7 +380,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum num-rational 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f2885278d5fe2adc2f75ced642d52d879bffaceb5a2e0b1d4309ffdfb239b454"
 "checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
 "checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
-"checksum pathfinding 1.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "37691aaf6640549d85ed79575cb159843b07380d420aac9e891b627e7cc3f1f3"
 "checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
 "checksum pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3a029430f0d744bc3d15dd474d591bed2402b645d024583082b9f63bb936dac6"
 "checksum proc-macro-hack 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)" = "982a35d1194084ba319d65c4a68d24ca28f5fdb5b8bc20899e4eef8641ea5178"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -12,7 +12,6 @@ im-rc = "13.0.0"
 fraction = "0.6.2"
 num = "0.2.0"
 fxhash = "0.2.1"
-pathfinding = "1.1.12"

 [dev-dependencies]
 pretty_assertions = "0.5.1"
--- a/src/canonicalize.rs
+++ b/src/canonicalize.rs
@ -2,12 +2,11 @@ use region::{Located, Region};
 use operator::Operator;
 use operator::Operator::Pizza;
 use operator::Associativity::*;
-use collections::{ImSortedSet, ImSortedMap, MutMap, MutSortedMap, MutSet};
+use collections::{ImSet, ImMap, MutMap, MutSet};
 use std::cmp::Ordering;
 use expr::{Ident, VariantName};
 use expr;
-use pathfinding::directed::topological_sort::topological_sort;
-use pathfinding::directed::strongly_connected_components::strongly_connected_component;
+use graph::{topological_sort, strongly_connected_component};
 use self::PatternType::*;

 #[derive(Clone, Debug, PartialEq)]
@ -114,13 +113,13 @@ impl Into<String> for Symbol {

 #[derive(Clone, Debug, PartialEq)]
 struct Scope {
-    pub idents: ImSortedMap<Ident, (Symbol, Region)>,
+    pub idents: ImMap<Ident, (Symbol, Region)>,
    symbol_prefix: String,
    next_unique_id: u64,
 }

 impl Scope {
-    pub fn new(symbol_prefix: String, declared_idents: ImSortedMap<Ident, (Symbol, Region)>) -> Scope {
+    pub fn new(symbol_prefix: String, declared_idents: ImMap<Ident, (Symbol, Region)>) -> Scope {
        Scope {
            symbol_prefix,

@ -176,19 +175,19 @@ struct Env {
    problems: Vec<Problem>,

    /// Variants either declared in this module, or imported.
-    variants: ImSortedMap<Symbol, Located<expr::VariantName>>,
+    variants: ImMap<Symbol, Located<expr::VariantName>>,

    /// Former closures converted to top-level procedures.
-    procedures: MutSortedMap<Symbol, Procedure>,
+    procedures: MutMap<Symbol, Procedure>,
 }

 impl Env {
-    pub fn new(home: String, declared_variants: ImSortedMap<Symbol, Located<expr::VariantName>>) -> Env {
+    pub fn new(home: String, declared_variants: ImMap<Symbol, Located<expr::VariantName>>) -> Env {
        Env {
            home,
            variants: declared_variants,
            problems: Vec::new(),
-            procedures: MutSortedMap::default(),
+            procedures: MutMap::default(),
        }
    }

@ -218,9 +217,9 @@ pub fn canonicalize_declaration(
    home: String,
    name: &str,
    loc_expr: Located<expr::Expr>,
-    declared_idents: &ImSortedMap<Ident, (Symbol, Region)>,
-    declared_variants: &ImSortedMap<Symbol, Located<expr::VariantName>>,
-) -> (Located<Expr>, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
+    declared_idents: &ImMap<Ident, (Symbol, Region)>,
+    declared_variants: &ImMap<Symbol, Located<expr::VariantName>>,
+) -> (Located<Expr>, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
    // If we're canonicalizing the declaration `foo = ...` inside the `Main` module,
    // scope_prefix will be "Main$foo$" and its first closure will be named "Main$foo$0"
    let scope_prefix = format!("{}${}$", home, name);
@ -249,19 +248,19 @@ pub struct Output {
 /// so it's important that building the same code gives the same order every time!
 #[derive(Clone, Debug, PartialEq)]
 pub struct References {
-    pub locals: ImSortedSet<Symbol>,
-    pub globals: ImSortedSet<Symbol>,
-    pub variants: ImSortedSet<Symbol>,
-    pub calls: ImSortedSet<Symbol>,
+    pub locals: ImSet<Symbol>,
+    pub globals: ImSet<Symbol>,
+    pub variants: ImSet<Symbol>,
+    pub calls: ImSet<Symbol>,
 }

 impl References {
    pub fn new() -> References {
        References {
-            locals: ImSortedSet::default(),
-            globals: ImSortedSet::default(),
-            variants: ImSortedSet::default(),
-            calls: ImSortedSet::default(),
+            locals: ImSet::default(),
+            globals: ImSet::default(),
+            variants: ImSet::default(),
+            calls: ImSet::default(),
        }
    }

@ -514,13 +513,13 @@ fn canonicalize(

            // Add the assigned identifiers to scope. If there's a collision, it means there
            // was shadowing, which will be handled later.
-            let assigned_idents: ImSortedMap<Ident, (Symbol, Region)> =
+            let assigned_idents: ImMap<Ident, (Symbol, Region)> =
                idents_from_patterns(assignments.clone().into_iter().map(|(loc_pattern, _)| loc_pattern), &scope);

            scope.idents = scope.idents.union(assigned_idents.clone());

            let mut refs_by_assignment: MutMap<Symbol, (Located<Ident>, References)> = MutMap::default();
-            let mut can_assignments_by_symbol: MutSortedMap<Symbol, (Pattern, Located<Expr>)> = MutSortedMap::default();
+            let mut can_assignments_by_symbol: MutMap<Symbol, (Pattern, Located<Expr>)> = MutMap::default();

            for (loc_pattern, expr) in assignments {
                // Each assignment gets to have all the idents in scope that are assigned in this
@ -647,7 +646,7 @@ fn canonicalize(
            // This way, during code gen, no assignment will refer to a value that hasn't been initialized yet.
            // As a bonus, the topological sort also reveals any cycles between the assignments, allowing
            // us to give a CircularAssignment error.
-            let successors = |symbol: &Symbol| -> ImSortedSet<Symbol>  {
+            let successors = |symbol: &Symbol| -> ImSet<Symbol>  {
                let (_, references) = refs_by_assignment.get(symbol).unwrap();

                references.locals.clone()
@ -673,6 +672,7 @@ fn canonicalize(
                    let loc_idents_in_cycle: Vec<Located<expr::Ident>> =
                        strongly_connected_component(&node_in_cycle, successors)
                            .into_iter()
+                            .rev() // Strongly connected component gives us the reverse of the sorting we want!
                            .map(|symbol| refs_by_assignment.get(&symbol).unwrap().0.clone())
                            .collect();

@ -702,7 +702,7 @@ fn canonicalize(

            // Add the arguments' idents to scope.idents. If there's a collision,
            // it means there was shadowing, which will be handled later.
-            let arg_idents: ImSortedMap<Ident, (Symbol, Region)> =
+            let arg_idents: ImMap<Ident, (Symbol, Region)> =
                idents_from_patterns(loc_arg_patterns.clone().into_iter(), &scope);

            scope.idents = scope.idents.union(arg_idents.clone());
@ -768,7 +768,7 @@ fn canonicalize(
                // Patterns introduce new idents to the scope!
                // Add the assigned identifiers to scope. If there's a collision, it means there
                // was shadowing, which will be handled later.
-                let assigned_idents: ImSortedMap<Ident, (Symbol, Region)> =
+                let assigned_idents: ImMap<Ident, (Symbol, Region)> =
                    idents_from_patterns(std::iter::once(loc_pattern), &scope);

                scope.idents = scope.idents.union(assigned_idents.clone());
@ -831,7 +831,7 @@ fn references_from_local<T>(
    assigned_symbol: Symbol,
    visited: &mut MutSet<Symbol>,
    refs_by_assignment: &MutMap<Symbol, (T, References)>,
-    procedures: &MutSortedMap<Symbol, Procedure>,
+    procedures: &MutMap<Symbol, Procedure>,
 ) -> References {
    match refs_by_assignment.get(&assigned_symbol) {
        Some((_, refs)) => {
@ -869,11 +869,10 @@ fn references_from_call<T>(
    call_symbol: Symbol,
    visited: &mut MutSet<Symbol>,
    refs_by_assignment: &MutMap<Symbol, (T, References)>,
-    procedures: &MutSortedMap<Symbol, Procedure>,
+    procedures: &MutMap<Symbol, Procedure>,
 ) -> References {
-    // This shuold be safe to unwrap. All unrecognized call symbols should have been recorded as
-    // such, and should never have made it into output.references.calls!
-    let procedure = procedures.get(&call_symbol).unwrap();
+    match procedures.get(&call_symbol) {
+        Some(procedure) => {
            let mut answer = procedure.references.clone();

            visited.insert(call_symbol);
@ -899,13 +898,20 @@ fn references_from_call<T>(
            }

            answer
+        },
+        None => {
+            // If the call symbol was not in the procedures map, that means we're calling a non-function and
+            // will get a type mismatch later. For now, assume no references as a result of the "call."
+            References::new()
+        }
+    }
 }


-fn idents_from_patterns<I>(loc_patterns: I, scope: &Scope) -> ImSortedMap<Ident, (Symbol, Region)>
+fn idents_from_patterns<I>(loc_patterns: I, scope: &Scope) -> ImMap<Ident, (Symbol, Region)>
 where I: Iterator<Item = Located<expr::Pattern>>
 {
-    let mut answer = ImSortedMap::default();
+    let mut answer = ImMap::default();

    for loc_pattern in loc_patterns {
        add_idents_from_pattern(loc_pattern, scope, &mut answer);
@ -918,7 +924,7 @@ where I: Iterator<Item = Located<expr::Pattern>>
 fn add_idents_from_pattern(
    loc_pattern: Located<expr::Pattern>,
    scope: &Scope,
-    answer: &mut ImSortedMap<Ident, (Symbol, Region)>
+    answer: &mut ImMap<Ident, (Symbol, Region)>
 ) {
    use expr::Pattern::*;

@ -940,7 +946,7 @@ fn add_idents_from_pattern(

 fn remove_idents(
    pattern: expr::Pattern,
-    idents: &mut ImSortedMap<Ident, (Symbol, Region)>
+    idents: &mut ImMap<Ident, (Symbol, Region)>
 ) {
    use expr::Pattern::*;

@ -1045,7 +1051,7 @@ fn canonicalize_pattern(
    scope: &mut Scope,
    pattern_type: &PatternType,
    loc_pattern: &Located<expr::Pattern>,
-    shadowable_idents: &mut ImSortedMap<Ident, (Symbol, Region)>,
+    shadowable_idents: &mut ImMap<Ident, (Symbol, Region)>,
 ) -> Pattern {
    use expr::Pattern::*;

--- a/src/collections.rs
+++ b/src/collections.rs
@ -2,35 +2,26 @@ use std::hash::BuildHasherDefault;

 pub use fxhash::FxHasher;

+#[inline(always)]
+pub fn default_hasher() -> BuildHasherDefault<FxHasher> {
+    BuildHasherDefault::default()
+}
+
+pub type BuildHasher = BuildHasherDefault<FxHasher>;
+
 // Versions of HashMap and HashSet from both std and im_rc
 // which use the FNV hasher instead of the default SipHash hasher.
 // FNV is faster but less secure; that's fine, since this compiler
 // doesn't need cryptographically secure hashes, and also is not a
 // server concerned about hash flooding attacks!
-
 pub type MutMap<K, V> =
-    std::collections::HashMap<K, V, BuildHasherDefault<FxHasher>>;
+    std::collections::HashMap<K, V, BuildHasher>;

 pub type MutSet<K> =
-    std::collections::HashSet<K, BuildHasherDefault<FxHasher>>;
+    std::collections::HashSet<K, BuildHasher>;

 pub type ImMap<K, V> =
-    im_rc::hashmap::HashMap<K, V, BuildHasherDefault<FxHasher>>;
+    im_rc::hashmap::HashMap<K, V, BuildHasher>;

 pub type ImSet<K> =
-    im_rc::hashset::HashSet<K, BuildHasherDefault<FxHasher>>;
-
-// OrdMap equivalents, for naming symmetry.
-// Someday we may switch these implementations out.
-
-pub type MutSortedMap<K, V> =
-    std::collections::BTreeMap<K, V>;
-
-pub type MutSortedSet<K> =
-    std::collections::BTreeSet<K>;
-
-pub type ImSortedMap<K, V> =
-    im_rc::ordmap::OrdMap<K, V>;
-
-pub type ImSortedSet<K> =
-    im_rc::ordset::OrdSet<K>;
+    im_rc::hashset::HashSet<K, BuildHasher>;
--- a/src/graph.rs
+++ b/src/graph.rs
@ -0,0 +1,375 @@
+// Adapted from the Pathfinding crate by Samuel Tardieu <sam@rfc1149.net>,
+// licensed under the Apache License, version 2.0 - https://www.apache.org/licenses/LICENSE-2.0
+//
+// The original source code can be found at: https://github.com/samueltardieu/pathfinding
+//
+// Thank you, Samuel!
+//
+//
+//
+// This is modified from the original source to use the Roc compiler's preferred hashers
+// instead of the SipHash hasher which Rust hash collections use by default.
+//
+// SipHash defends against hash flooding attacks by generating a random seed
+// whenever a new hasher is instantiated, and which is designed to prevent attackers
+// from crafting intentional collisions that amplify denial-of-service attacks.
+// Since this is a compiler, we aren't worried about denial-of-service attacks.
+//
+// The primary motivation for this change is wanting the compiler to always give exactly
+// the same answer given the same inputs. So if you give it the same source files, it should
+// produce identical binaries every time. SipHash by design gives different answers on each run.
+//
+// Secondarily, SipHash isn't the fastest hashing algorithm out there, so we can get
+// slightly better performance by using a faster hasher.
+
+// Find a topological order in a directed graph if one exists.
+
+use collections::{BuildHasher, MutSet, default_hasher};
+use std::collections::{HashMap, HashSet, VecDeque};
+use std::hash::Hash;
+use std::mem;
+
+/// Find a topological order in a directed graph if one exists.
+///
+/// - `nodes` is a collection of nodes.
+/// - `successors` returns a list of successors for a given node.
+///
+/// The function returns either `Ok` with an acceptable topological order,
+/// or `Err` with a node belonging to a cycle. In the latter case, the
+/// strongly connected set can then be found using the
+/// [`strongly_connected_component`](super::strongly_connected_components::strongly_connected_component)
+/// function, or if only one of the loops is needed the [`bfs_loop`][super::bfs::bfs_loop] function
+/// can be used instead to identify one of the shortest loops involving this node.
+///
+/// # Examples
+///
+/// We will sort integers from 1 to 9, each integer having its two immediate
+/// greater numbers as successors:
+///
+/// ```
+/// use pathfinding::prelude::topological_sort;
+///
+/// fn successors(node: &usize) -> Vec<usize> {
+///   match *node {
+///     n if n <= 7 => vec![n+1, n+2],
+///     8 => vec![9],
+///     _ => vec![],
+///   }
+/// }
+///
+/// let sorted = topological_sort(&[3, 7, 1, 4, 2, 9, 8, 6, 5], successors);
+/// assert_eq!(sorted, Ok(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
+/// ```
+///
+/// If, however, there is a loop in the graph (for example, all nodes but 7
+/// have also 7 has a successor), one of the nodes in the loop will be returned as
+/// an error:
+///
+/// ```
+/// use pathfinding::prelude::*;
+///
+/// fn successors(node: &usize) -> Vec<usize> {
+///   match *node {
+///     n if n <= 6 => vec![n+1, n+2, 7],
+///     7 => vec![8, 9],
+///     8 => vec![7, 9],
+///     _ => vec![7],
+///   }
+/// }
+///
+/// let sorted = topological_sort(&[3, 7, 1, 4, 2, 9, 8, 6, 5], successors);
+/// assert!(sorted.is_err());
+///
+/// // Let's assume that the returned node is 8 (it can be any node which is part
+/// // of a loop). We can lookup up one of the shortest loops containing 8
+/// // (8 -> 7 -> 8 is the unique loop with two hops containing 8):
+///
+/// assert_eq!(bfs_loop(&8, successors), Some(vec![8, 7, 8]));
+///
+/// // We can also request the whole strongly connected set containing 8. Here
+/// // 7, 8, and 9 are all reachable from one another.
+///
+/// let mut set = strongly_connected_component(&8, successors);
+/// set.sort();
+/// assert_eq!(set, vec![7, 8, 9]);
+/// ```
+pub fn topological_sort<N, FN, IN>(nodes: &[N], mut successors: FN) -> Result<Vec<N>, N>
+where
+    N: Eq + Hash + Clone,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    let mut unmarked: MutSet<N> = nodes.iter().cloned().collect::<MutSet<_>>();
+    let mut marked = HashSet::with_capacity_and_hasher(nodes.len(), default_hasher());
+    let mut temp = MutSet::default();
+    let mut sorted = VecDeque::with_capacity(nodes.len());
+    while let Some(node) = unmarked.iter().cloned().next() {
+        temp.clear();
+        visit(
+            &node,
+            &mut successors,
+            &mut unmarked,
+            &mut marked,
+            &mut temp,
+            &mut sorted,
+        )?;
+    }
+    Ok(sorted.into_iter().collect())
+}
+
+fn visit<N, FN, IN>(
+    node: &N,
+    successors: &mut FN,
+    unmarked: &mut MutSet<N>,
+    marked: &mut MutSet<N>,
+    temp: &mut MutSet<N>,
+    sorted: &mut VecDeque<N>,
+) -> Result<(), N>
+where
+    N: Eq + Hash + Clone,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    unmarked.remove(node);
+    if marked.contains(node) {
+        return Ok(());
+    }
+    if temp.contains(node) {
+        return Err(node.clone());
+    }
+    temp.insert(node.clone());
+    for n in successors(node) {
+        visit(&n, successors, unmarked, marked, temp, sorted)?;
+    }
+    marked.insert(node.clone());
+    sorted.push_front(node.clone());
+    Ok(())
+}
+
+/// Topologically sort a directed graph into groups of independent nodes.
+///
+/// - `nodes` is a collection of nodes.
+/// - `successors` returns a list of successors for a given node.
+///
+/// This function works like [`topological_sort`](self::topological_sort), but
+/// rather than producing a single ordering of nodes, this function partitions
+/// the nodes into groups: the first group contains all nodes with no
+/// dependencies, the second group contains all nodes whose only dependencies
+/// are in the first group, and so on.  Concatenating the groups produces a
+/// valid topological sort regardless of how the nodes within each group are
+/// reordered.  No guarantees are made about the order of nodes within each
+/// group.
+///
+/// The function returns either `Ok` with a valid list of groups, or `Err` with
+/// a (groups, remaining) tuple containing a (possibly empty) partial list of
+/// groups, and a list of remaining nodes that could not be grouped due to
+/// cycles.  In the error case, the strongly connected set(s) can then be found
+/// using the
+/// [`strongly_connected_components`](super::strongly_connected_components::strongly_connected_components)
+/// function on the list of remaining nodes.
+///
+/// The current implementation uses a variation of [Kahn's
+/// algorithm](https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm),
+/// and runs in O(|V| + |E|) time.
+#[allow(clippy::type_complexity)]
+#[allow(dead_code)]
+pub fn topological_sort_into_groups<N, FN, IN>(
+    nodes: &[N],
+    mut successors: FN,
+) -> Result<Vec<Vec<N>>, (Vec<Vec<N>>, Vec<N>)>
+where
+    N: Eq + Hash + Clone,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    if nodes.is_empty() {
+        return Ok(Vec::new());
+    }
+    let mut succs_map = HashMap::<N, MutSet<N>, BuildHasher>::with_capacity_and_hasher(nodes.len(), default_hasher());
+    let mut preds_map = HashMap::<N, usize, BuildHasher>::with_capacity_and_hasher(nodes.len(), default_hasher());
+    for node in nodes.iter() {
+        succs_map.insert(node.clone(), successors(node).into_iter().collect());
+        preds_map.insert(node.clone(), 0);
+    }
+    for succs in succs_map.values() {
+        for succ in succs.iter() {
+            *preds_map.get_mut(succ).unwrap() += 1;
+        }
+    }
+    let mut groups = Vec::<Vec<N>>::new();
+    let mut prev_group: Vec<N> = preds_map
+        .iter()
+        .filter_map(|(node, &num_preds)| {
+            if num_preds == 0 {
+                Some(node.clone())
+            } else {
+                None
+            }
+        })
+        .collect();
+    if prev_group.is_empty() {
+        let remaining: Vec<N> = preds_map.into_iter().map(|(node, _)| node).collect();
+        return Err((Vec::new(), remaining));
+    }
+    for node in prev_group.iter() {
+        preds_map.remove(node);
+    }
+    while !preds_map.is_empty() {
+        let mut next_group = Vec::<N>::new();
+        for node in prev_group.iter() {
+            for succ in &succs_map[node] {
+                {
+                    let num_preds = preds_map.get_mut(succ).unwrap();
+                    *num_preds -= 1;
+                    if *num_preds > 0 {
+                        continue;
+                    }
+                }
+                next_group.push(preds_map.remove_entry(succ).unwrap().0);
+            }
+        }
+        groups.push(mem::replace(&mut prev_group, next_group));
+        if prev_group.is_empty() {
+            let remaining: Vec<N> = preds_map.into_iter().map(|(node, _)| node).collect();
+            return Err((groups, remaining));
+        }
+    }
+    groups.push(prev_group);
+    Ok(groups)
+}
+
+// Separate nodes of a directed graph into [strongly connected
+// components](https://en.wikipedia.org/wiki/Strongly_connected_component).
+//
+// A [path-based strong component
+// algorithm](https://en.wikipedia.org/wiki/Path-based_strong_component_algorithm)
+// is used.
+
+struct Params<N, FN, IN>
+where
+    N: Clone + Hash + Eq,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    preorders: HashMap<N, Option<usize>, BuildHasher>,
+    c: usize,
+    successors: FN,
+    p: Vec<N>,
+    s: Vec<N>,
+    scc: Vec<Vec<N>>,
+    scca: MutSet<N>,
+}
+
+impl<N, FN, IN> Params<N, FN, IN>
+where
+    N: Clone + Hash + Eq,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    fn new(nodes: &[N], successors: FN) -> Self {
+        Params {
+            preorders: nodes
+                .iter()
+                .map(|n| (n.clone(), None))
+                .collect::<HashMap<N, Option<usize>, BuildHasher>>(),
+            c: 0,
+            successors,
+            p: Vec::new(),
+            s: Vec::new(),
+            scc: Vec::new(),
+            scca: MutSet::default(),
+        }
+    }
+}
+
+fn recurse_onto<N, FN, IN>(v: &N, params: &mut Params<N, FN, IN>)
+where
+    N: Clone + Hash + Eq,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    params.preorders.insert(v.clone(), Some(params.c));
+    params.c += 1;
+    params.s.push(v.clone());
+    params.p.push(v.clone());
+    for w in (params.successors)(v) {
+        if !params.scca.contains(&w) {
+            if let Some(pw) = params.preorders.get(&w).and_then(|w| *w) {
+                while params.preorders[&params.p[params.p.len() - 1]].unwrap() > pw {
+                    params.p.pop();
+                }
+            } else {
+                recurse_onto(&w, params);
+            }
+        }
+    }
+    if params.p[params.p.len() - 1] == *v {
+        params.p.pop();
+        let mut component = Vec::new();
+        while let Some(node) = params.s.pop() {
+            component.push(node.clone());
+            params.scca.insert(node.clone());
+            params.preorders.remove(&node);
+            if node == *v {
+                break;
+            }
+        }
+        params.scc.push(component);
+    }
+}
+
+/// Partition nodes reachable from a starting point into strongly connected components.
+///
+/// - `start` is the node we want to explore the graph from.
+/// - `successors` returns a list of successors for a given node.
+///
+/// The function returns a list of strongly connected components sets. It will contain
+/// at least one component (the one containing the `start` node).
+pub fn strongly_connected_components_from<N, FN, IN>(start: &N, successors: FN) -> Vec<Vec<N>>
+where
+    N: Clone + Hash + Eq,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    let mut params = Params::new(&[], successors);
+    recurse_onto(start, &mut params);
+    params.scc
+}
+
+/// Compute the strongly connected component containing a given node.
+///
+/// - `node` is the node we want the strongly connected component for.
+/// - `successors` returns a list of successors for a given node.
+///
+/// The function returns the strongly connected component containing the node,
+/// which is guaranteed to contain at least `node`.
+pub fn strongly_connected_component<N, FN, IN>(node: &N, successors: FN) -> Vec<N>
+where
+    N: Clone + Hash + Eq,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    strongly_connected_components_from(node, successors)
+        .pop()
+        .unwrap()
+}
+
+/// Partition all strongly connected components in a graph.
+///
+/// - `nodes` is a collection of nodes.
+/// - `successors` returns a list of successors for a given node.
+///
+/// The function returns a list of strongly connected components sets.
+#[allow(dead_code)]
+pub fn strongly_connected_components<N, FN, IN>(nodes: &[N], successors: FN) -> Vec<Vec<N>>
+where
+    N: Clone + Hash + Eq,
+    FN: FnMut(&N) -> IN,
+    IN: IntoIterator<Item = N>,
+{
+    let mut params = Params::new(nodes, successors);
+    while let Some(node) = params.preorders.keys().find(|_| true).cloned() {
+        recurse_onto(&node, &mut params);
+    }
+    params.scc
+}
--- a/src/lib.rs
+++ b/src/lib.rs
@ -6,7 +6,7 @@ pub mod operator;
 pub mod region;
 pub mod canonicalize;
 pub mod collections;
-// mod ena;
+mod graph;

 // #[macro_use]
 // extern crate log;
@ -18,6 +18,5 @@ extern crate im_rc;
 extern crate fraction;
 extern crate num;
 extern crate fxhash;
-extern crate pathfinding;

 #[macro_use] extern crate combine;
--- a/tests/helpers/mod.rs
+++ b/tests/helpers/mod.rs
@ -1,6 +1,7 @@
 use roc::expr::{Expr, Pattern};
 use roc::region::{Located, Region};
-use roc::collections::{MutSortedMap};
+use std::hash::Hash;
+use roc::collections::{MutMap};

 pub fn loc_box<T>(val: T) -> Box<Located<T>> {
    Box::new(loc(val))
@ -73,11 +74,11 @@ pub fn zero_loc_pattern(loc_pattern: Located<Pattern>) -> Located<Pattern> {
 }

 #[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_canonicalize.rs
-pub fn mut_sorted_map_from_pairs<K, V, I>(pairs: I) -> MutSortedMap<K, V>
+pub fn mut_map_from_pairs<K, V, I>(pairs: I) -> MutMap<K, V>
    where I: IntoIterator<Item=(K, V)>,
-        K: Ord
+        K: Hash + Eq
    {
-        let mut answer = MutSortedMap::default();
+        let mut answer = MutMap::default();

        for (key, value) in pairs {
            answer.insert(key, value);
--- a/tests/test_canonicalize.rs
+++ b/tests/test_canonicalize.rs
@ -17,22 +17,22 @@ mod test_canonicalize {
    use roc::operator::Operator;
    use roc::region::{Located, Region};
    use roc::parse;
-    use roc::collections::{ImSortedMap, ImSortedSet, MutSortedMap};
+    use roc::collections::{ImMap, ImSet, MutMap};
    use roc::parse_state::{IndentablePosition};
    use combine::{Parser, eof};
    use combine::stream::state::{State};
-    use helpers::{loc, loc_box, empty_region, zero_loc_expr, mut_sorted_map_from_pairs};
+    use helpers::{loc, loc_box, empty_region, zero_loc_expr, mut_map_from_pairs};

-    fn can_expr(expr_str: &str) -> (Expr, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
-        can_expr_with("testDecl", expr_str, &ImSortedMap::default(), &ImSortedMap::default())
+    fn can_expr(expr_str: &str) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
+        can_expr_with("testDecl", expr_str, &ImMap::default(), &ImMap::default())
    }

    fn can_expr_with(
        name: &str,
        expr_str: &str,
-        declared_idents: &ImSortedMap<Ident, (Symbol, Region)>,
-        declared_variants: &ImSortedMap<Symbol, Located<expr::VariantName>>,
-    ) -> (Expr, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
+        declared_idents: &ImMap<Ident, (Symbol, Region)>,
+        declared_variants: &ImMap<Symbol, Located<expr::VariantName>>,
+    ) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
        let parse_state: State<&str, IndentablePosition> = State::with_positioner(expr_str, IndentablePosition::default());
        let expr = match parse::expr().skip(eof()).easy_parse(parse_state) {
            Ok((expr, state)) => {
@ -90,8 +90,8 @@ mod test_canonicalize {
        }
    }

-    fn vec_to_set<'a>(vec: Vec<&'a str>) -> ImSortedSet<Symbol> {
-        ImSortedSet::from(vec.into_iter().map(sym).collect::<Vec<_>>())
+    fn vec_to_set<'a>(vec: Vec<&'a str>) -> ImSet<Symbol> {
+        ImSet::from(vec.into_iter().map(sym).collect::<Vec<_>>())
    }

    // BASIC CANONICALIZATION
@ -117,7 +117,7 @@ mod test_canonicalize {
        }.into());

        assert_eq!(procedures,
-            mut_sorted_map_from_pairs(vec![(sym("func"),
+            mut_map_from_pairs(vec![(sym("func"),
                Procedure {
                    name: Some("func".to_string()),
                    is_self_tail_recursive: false,
@ -340,59 +340,80 @@ mod test_canonicalize {
            tail_call: None
        }.into());

-        // This should get reordered to the following, so that in code gen
-        // everything will have been set before it gets read.
-        // (The order of the function definitions doesn't matter.)
-        assert_assignment_order(expr,
-            vec!["func1", "x", "z", "func2", "y"],
-        );
+        let symbols = assigned_symbols(expr);
+
+        // In code gen, for everything to have been set before it gets read,
+        // the following must be true about when things are assigned:
+        //
+        // x and func2 must be assigned (in either order) before y
+        // y and func1 must be assigned (in either order) before z
+        assert_before("x", "y", &symbols);
+        assert_before("func2", "y", &symbols);
+
+        assert_before("func1", "z", &symbols);
+        assert_before("y", "z", &symbols);
    }

-    fn assert_assignment_order(expr: Expr, expected_strings: Vec<&str>) {
+    fn assert_before(before: &str, after: &str, symbols: &Vec<Symbol>) {
+        assert_ne!(before, after);
+
+        let before_symbol = sym(before);
+        let after_symbol = sym(after);
+        let before_index = symbols.iter().position(|symbol| symbol == &before_symbol).unwrap_or_else(||
+            panic!("error in assert_before({:?}, {:?}): {:?} could not be found in {:?}", before, after, sym(before), symbols)
+        );
+        let after_index = symbols.iter().position(|symbol| symbol == &after_symbol).unwrap_or_else(||
+            panic!("error in assert_before({:?}, {:?}): {:?} could not be found in {:?}", before, after, sym(after), symbols)
+        );
+
+        if before_index == after_index {
+            panic!("error in assert_before({:?}, {:?}): both were at index {} in {:?}", before, after, after_index, symbols);
+        } else if before_index > after_index {
+            panic!("error in assert_before: {:?} appeared *after* {:?} (not before, as expected) in {:?}", before, after, symbols);
+        }
+    }
+
+    fn assigned_symbols(expr: Expr) -> Vec<Symbol> {
        match expr {
            Assign(assignments, _) => {
-                let expected_symbols: Vec<Symbol> = expected_strings.into_iter().map(sym).collect();
-                let actual_symbols: Vec<Symbol> = assignments.into_iter().map(|(pattern, _)| {
+                assignments.into_iter().map(|(pattern, _)| {
                    match pattern {
                        Identifier(symbol) => {
                            symbol
                        },
                        _ => {
-                            panic!("Called assert_assignment_order passing an Assign expr with non-Identifier patterns!");
+                            panic!("Called assigned_symbols passing an Assign expr with non-Identifier patterns!");
                        }
                    }
-                }).collect();
-
-                assert_eq!(actual_symbols, expected_symbols);
+                }).collect()
            }
            _ => {
-                panic!("Called assert_assignment_order passing a non-Assign expr!");
+                panic!("Called assigned_symbols passing a non-Assign expr!");
            }
        }
    }

-
    // CIRCULAR ASSIGNMENT

    #[test]
    fn circular_assignment() {
        let (_, _, problems, _) = can_expr(indoc!(r#"
+            c = d + 3
+            b = 2 + c
+            d = a + 7
            a = b + 1
-            b = 2 * c
-            c = a 7

-            2 + c
+            2 + d
        "#));

        assert_eq!(problems, vec![
            Problem::CircularAssignment(vec![
-                loc(unqualified("c")),
-                loc(unqualified("b")),
                loc(unqualified("a")),
+                loc(unqualified("b")),
+                loc(unqualified("c")),
+                loc(unqualified("d")),
            ])
        ]);
-
-        panic!("TODO strongly_connected_component doesn't sort these, but we want them sorted!");
    }