Fix sorting by vendoring the pathfinding crate.

This commit is contained in:
Richard Feldman 2019-08-01 18:38:53 -04:00
parent e3e92b56fb
commit 00a02d597a
8 changed files with 507 additions and 143 deletions

28
Cargo.lock generated
View file

@ -87,11 +87,6 @@ dependencies = [
"typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "indexmap"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "indoc"
version = "0.3.3"
@ -113,14 +108,6 @@ dependencies = [
"unindent 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "itertools"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.3.0"
@ -218,17 +205,6 @@ name = "ordermap"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "pathfinding"
version = "1.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "petgraph"
version = "0.4.13"
@ -286,7 +262,6 @@ dependencies = [
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"num 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"pathfinding 1.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -391,10 +366,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum fraction 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1055159ac82fb210c813303f716b6c8db57ace9d5ec2dbbc2e1d7a864c1dd74e"
"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
"checksum im-rc 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0a0197597d095c0d11107975d3175173f810ee572c2501ff4de64f4f3f119806"
"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
"checksum indoc 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1f59f228c76fda6ecd8dab79683039a7054c748587f682a911094f473647bd6"
"checksum indoc-impl 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "63f070ef080db3601c1a0ecc75c7bb35104cc0ce2d7c4e049952a96a61d8933b"
"checksum itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5b8467d9c1cebe26feb08c640139247fac215782d35371ade9a2136ed6085358"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
"checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43"
@ -407,7 +380,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum num-rational 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f2885278d5fe2adc2f75ced642d52d879bffaceb5a2e0b1d4309ffdfb239b454"
"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum pathfinding 1.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "37691aaf6640549d85ed79575cb159843b07380d420aac9e891b627e7cc3f1f3"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
"checksum pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3a029430f0d744bc3d15dd474d591bed2402b645d024583082b9f63bb936dac6"
"checksum proc-macro-hack 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)" = "982a35d1194084ba319d65c4a68d24ca28f5fdb5b8bc20899e4eef8641ea5178"

View file

@ -12,7 +12,6 @@ im-rc = "13.0.0"
fraction = "0.6.2"
num = "0.2.0"
fxhash = "0.2.1"
pathfinding = "1.1.12"
[dev-dependencies]
pretty_assertions = "0.5.1"

View file

@ -2,12 +2,11 @@ use region::{Located, Region};
use operator::Operator;
use operator::Operator::Pizza;
use operator::Associativity::*;
use collections::{ImSortedSet, ImSortedMap, MutMap, MutSortedMap, MutSet};
use collections::{ImSet, ImMap, MutMap, MutSet};
use std::cmp::Ordering;
use expr::{Ident, VariantName};
use expr;
use pathfinding::directed::topological_sort::topological_sort;
use pathfinding::directed::strongly_connected_components::strongly_connected_component;
use graph::{topological_sort, strongly_connected_component};
use self::PatternType::*;
#[derive(Clone, Debug, PartialEq)]
@ -114,13 +113,13 @@ impl Into<String> for Symbol {
#[derive(Clone, Debug, PartialEq)]
struct Scope {
pub idents: ImSortedMap<Ident, (Symbol, Region)>,
pub idents: ImMap<Ident, (Symbol, Region)>,
symbol_prefix: String,
next_unique_id: u64,
}
impl Scope {
pub fn new(symbol_prefix: String, declared_idents: ImSortedMap<Ident, (Symbol, Region)>) -> Scope {
pub fn new(symbol_prefix: String, declared_idents: ImMap<Ident, (Symbol, Region)>) -> Scope {
Scope {
symbol_prefix,
@ -176,19 +175,19 @@ struct Env {
problems: Vec<Problem>,
/// Variants either declared in this module, or imported.
variants: ImSortedMap<Symbol, Located<expr::VariantName>>,
variants: ImMap<Symbol, Located<expr::VariantName>>,
/// Former closures converted to top-level procedures.
procedures: MutSortedMap<Symbol, Procedure>,
procedures: MutMap<Symbol, Procedure>,
}
impl Env {
pub fn new(home: String, declared_variants: ImSortedMap<Symbol, Located<expr::VariantName>>) -> Env {
pub fn new(home: String, declared_variants: ImMap<Symbol, Located<expr::VariantName>>) -> Env {
Env {
home,
variants: declared_variants,
problems: Vec::new(),
procedures: MutSortedMap::default(),
procedures: MutMap::default(),
}
}
@ -218,9 +217,9 @@ pub fn canonicalize_declaration(
home: String,
name: &str,
loc_expr: Located<expr::Expr>,
declared_idents: &ImSortedMap<Ident, (Symbol, Region)>,
declared_variants: &ImSortedMap<Symbol, Located<expr::VariantName>>,
) -> (Located<Expr>, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
declared_idents: &ImMap<Ident, (Symbol, Region)>,
declared_variants: &ImMap<Symbol, Located<expr::VariantName>>,
) -> (Located<Expr>, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
// If we're canonicalizing the declaration `foo = ...` inside the `Main` module,
// scope_prefix will be "Main$foo$" and its first closure will be named "Main$foo$0"
let scope_prefix = format!("{}${}$", home, name);
@ -249,19 +248,19 @@ pub struct Output {
/// so it's important that building the same code gives the same order every time!
#[derive(Clone, Debug, PartialEq)]
pub struct References {
pub locals: ImSortedSet<Symbol>,
pub globals: ImSortedSet<Symbol>,
pub variants: ImSortedSet<Symbol>,
pub calls: ImSortedSet<Symbol>,
pub locals: ImSet<Symbol>,
pub globals: ImSet<Symbol>,
pub variants: ImSet<Symbol>,
pub calls: ImSet<Symbol>,
}
impl References {
pub fn new() -> References {
References {
locals: ImSortedSet::default(),
globals: ImSortedSet::default(),
variants: ImSortedSet::default(),
calls: ImSortedSet::default(),
locals: ImSet::default(),
globals: ImSet::default(),
variants: ImSet::default(),
calls: ImSet::default(),
}
}
@ -514,13 +513,13 @@ fn canonicalize(
// Add the assigned identifiers to scope. If there's a collision, it means there
// was shadowing, which will be handled later.
let assigned_idents: ImSortedMap<Ident, (Symbol, Region)> =
let assigned_idents: ImMap<Ident, (Symbol, Region)> =
idents_from_patterns(assignments.clone().into_iter().map(|(loc_pattern, _)| loc_pattern), &scope);
scope.idents = scope.idents.union(assigned_idents.clone());
let mut refs_by_assignment: MutMap<Symbol, (Located<Ident>, References)> = MutMap::default();
let mut can_assignments_by_symbol: MutSortedMap<Symbol, (Pattern, Located<Expr>)> = MutSortedMap::default();
let mut can_assignments_by_symbol: MutMap<Symbol, (Pattern, Located<Expr>)> = MutMap::default();
for (loc_pattern, expr) in assignments {
// Each assignment gets to have all the idents in scope that are assigned in this
@ -647,7 +646,7 @@ fn canonicalize(
// This way, during code gen, no assignment will refer to a value that hasn't been initialized yet.
// As a bonus, the topological sort also reveals any cycles between the assignments, allowing
// us to give a CircularAssignment error.
let successors = |symbol: &Symbol| -> ImSortedSet<Symbol> {
let successors = |symbol: &Symbol| -> ImSet<Symbol> {
let (_, references) = refs_by_assignment.get(symbol).unwrap();
references.locals.clone()
@ -673,6 +672,7 @@ fn canonicalize(
let loc_idents_in_cycle: Vec<Located<expr::Ident>> =
strongly_connected_component(&node_in_cycle, successors)
.into_iter()
.rev() // Strongly connected component gives us the reverse of the sorting we want!
.map(|symbol| refs_by_assignment.get(&symbol).unwrap().0.clone())
.collect();
@ -702,7 +702,7 @@ fn canonicalize(
// Add the arguments' idents to scope.idents. If there's a collision,
// it means there was shadowing, which will be handled later.
let arg_idents: ImSortedMap<Ident, (Symbol, Region)> =
let arg_idents: ImMap<Ident, (Symbol, Region)> =
idents_from_patterns(loc_arg_patterns.clone().into_iter(), &scope);
scope.idents = scope.idents.union(arg_idents.clone());
@ -768,7 +768,7 @@ fn canonicalize(
// Patterns introduce new idents to the scope!
// Add the assigned identifiers to scope. If there's a collision, it means there
// was shadowing, which will be handled later.
let assigned_idents: ImSortedMap<Ident, (Symbol, Region)> =
let assigned_idents: ImMap<Ident, (Symbol, Region)> =
idents_from_patterns(std::iter::once(loc_pattern), &scope);
scope.idents = scope.idents.union(assigned_idents.clone());
@ -831,7 +831,7 @@ fn references_from_local<T>(
assigned_symbol: Symbol,
visited: &mut MutSet<Symbol>,
refs_by_assignment: &MutMap<Symbol, (T, References)>,
procedures: &MutSortedMap<Symbol, Procedure>,
procedures: &MutMap<Symbol, Procedure>,
) -> References {
match refs_by_assignment.get(&assigned_symbol) {
Some((_, refs)) => {
@ -869,11 +869,10 @@ fn references_from_call<T>(
call_symbol: Symbol,
visited: &mut MutSet<Symbol>,
refs_by_assignment: &MutMap<Symbol, (T, References)>,
procedures: &MutSortedMap<Symbol, Procedure>,
procedures: &MutMap<Symbol, Procedure>,
) -> References {
// This shuold be safe to unwrap. All unrecognized call symbols should have been recorded as
// such, and should never have made it into output.references.calls!
let procedure = procedures.get(&call_symbol).unwrap();
match procedures.get(&call_symbol) {
Some(procedure) => {
let mut answer = procedure.references.clone();
visited.insert(call_symbol);
@ -899,13 +898,20 @@ fn references_from_call<T>(
}
answer
},
None => {
// If the call symbol was not in the procedures map, that means we're calling a non-function and
// will get a type mismatch later. For now, assume no references as a result of the "call."
References::new()
}
}
}
fn idents_from_patterns<I>(loc_patterns: I, scope: &Scope) -> ImSortedMap<Ident, (Symbol, Region)>
fn idents_from_patterns<I>(loc_patterns: I, scope: &Scope) -> ImMap<Ident, (Symbol, Region)>
where I: Iterator<Item = Located<expr::Pattern>>
{
let mut answer = ImSortedMap::default();
let mut answer = ImMap::default();
for loc_pattern in loc_patterns {
add_idents_from_pattern(loc_pattern, scope, &mut answer);
@ -918,7 +924,7 @@ where I: Iterator<Item = Located<expr::Pattern>>
fn add_idents_from_pattern(
loc_pattern: Located<expr::Pattern>,
scope: &Scope,
answer: &mut ImSortedMap<Ident, (Symbol, Region)>
answer: &mut ImMap<Ident, (Symbol, Region)>
) {
use expr::Pattern::*;
@ -940,7 +946,7 @@ fn add_idents_from_pattern(
fn remove_idents(
pattern: expr::Pattern,
idents: &mut ImSortedMap<Ident, (Symbol, Region)>
idents: &mut ImMap<Ident, (Symbol, Region)>
) {
use expr::Pattern::*;
@ -1045,7 +1051,7 @@ fn canonicalize_pattern(
scope: &mut Scope,
pattern_type: &PatternType,
loc_pattern: &Located<expr::Pattern>,
shadowable_idents: &mut ImSortedMap<Ident, (Symbol, Region)>,
shadowable_idents: &mut ImMap<Ident, (Symbol, Region)>,
) -> Pattern {
use expr::Pattern::*;

View file

@ -2,35 +2,26 @@ use std::hash::BuildHasherDefault;
pub use fxhash::FxHasher;
#[inline(always)]
pub fn default_hasher() -> BuildHasherDefault<FxHasher> {
BuildHasherDefault::default()
}
pub type BuildHasher = BuildHasherDefault<FxHasher>;
// Versions of HashMap and HashSet from both std and im_rc
// which use the FNV hasher instead of the default SipHash hasher.
// FNV is faster but less secure; that's fine, since this compiler
// doesn't need cryptographically secure hashes, and also is not a
// server concerned about hash flooding attacks!
pub type MutMap<K, V> =
std::collections::HashMap<K, V, BuildHasherDefault<FxHasher>>;
std::collections::HashMap<K, V, BuildHasher>;
pub type MutSet<K> =
std::collections::HashSet<K, BuildHasherDefault<FxHasher>>;
std::collections::HashSet<K, BuildHasher>;
pub type ImMap<K, V> =
im_rc::hashmap::HashMap<K, V, BuildHasherDefault<FxHasher>>;
im_rc::hashmap::HashMap<K, V, BuildHasher>;
pub type ImSet<K> =
im_rc::hashset::HashSet<K, BuildHasherDefault<FxHasher>>;
// OrdMap equivalents, for naming symmetry.
// Someday we may switch these implementations out.
pub type MutSortedMap<K, V> =
std::collections::BTreeMap<K, V>;
pub type MutSortedSet<K> =
std::collections::BTreeSet<K>;
pub type ImSortedMap<K, V> =
im_rc::ordmap::OrdMap<K, V>;
pub type ImSortedSet<K> =
im_rc::ordset::OrdSet<K>;
im_rc::hashset::HashSet<K, BuildHasher>;

375
src/graph.rs Normal file
View file

@ -0,0 +1,375 @@
// Adapted from the Pathfinding crate by Samuel Tardieu <sam@rfc1149.net>,
// licensed under the Apache License, version 2.0 - https://www.apache.org/licenses/LICENSE-2.0
//
// The original source code can be found at: https://github.com/samueltardieu/pathfinding
//
// Thank you, Samuel!
//
//
//
// This is modified from the original source to use the Roc compiler's preferred hashers
// instead of the SipHash hasher which Rust hash collections use by default.
//
// SipHash defends against hash flooding attacks by generating a random seed
// whenever a new hasher is instantiated, and which is designed to prevent attackers
// from crafting intentional collisions that amplify denial-of-service attacks.
// Since this is a compiler, we aren't worried about denial-of-service attacks.
//
// The primary motivation for this change is wanting the compiler to always give exactly
// the same answer given the same inputs. So if you give it the same source files, it should
// produce identical binaries every time. SipHash by design gives different answers on each run.
//
// Secondarily, SipHash isn't the fastest hashing algorithm out there, so we can get
// slightly better performance by using a faster hasher.
// Find a topological order in a directed graph if one exists.
use collections::{BuildHasher, MutSet, default_hasher};
use std::collections::{HashMap, HashSet, VecDeque};
use std::hash::Hash;
use std::mem;
/// Find a topological order in a directed graph if one exists.
///
/// - `nodes` is a collection of nodes.
/// - `successors` returns a list of successors for a given node.
///
/// The function returns either `Ok` with an acceptable topological order,
/// or `Err` with a node belonging to a cycle. In the latter case, the
/// strongly connected set can then be found using the
/// [`strongly_connected_component`](super::strongly_connected_components::strongly_connected_component)
/// function, or if only one of the loops is needed the [`bfs_loop`][super::bfs::bfs_loop] function
/// can be used instead to identify one of the shortest loops involving this node.
///
/// # Examples
///
/// We will sort integers from 1 to 9, each integer having its two immediate
/// greater numbers as successors:
///
/// ```
/// use pathfinding::prelude::topological_sort;
///
/// fn successors(node: &usize) -> Vec<usize> {
/// match *node {
/// n if n <= 7 => vec![n+1, n+2],
/// 8 => vec![9],
/// _ => vec![],
/// }
/// }
///
/// let sorted = topological_sort(&[3, 7, 1, 4, 2, 9, 8, 6, 5], successors);
/// assert_eq!(sorted, Ok(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
/// ```
///
/// If, however, there is a loop in the graph (for example, all nodes but 7
/// have also 7 has a successor), one of the nodes in the loop will be returned as
/// an error:
///
/// ```
/// use pathfinding::prelude::*;
///
/// fn successors(node: &usize) -> Vec<usize> {
/// match *node {
/// n if n <= 6 => vec![n+1, n+2, 7],
/// 7 => vec![8, 9],
/// 8 => vec![7, 9],
/// _ => vec![7],
/// }
/// }
///
/// let sorted = topological_sort(&[3, 7, 1, 4, 2, 9, 8, 6, 5], successors);
/// assert!(sorted.is_err());
///
/// // Let's assume that the returned node is 8 (it can be any node which is part
/// // of a loop). We can lookup up one of the shortest loops containing 8
/// // (8 -> 7 -> 8 is the unique loop with two hops containing 8):
///
/// assert_eq!(bfs_loop(&8, successors), Some(vec![8, 7, 8]));
///
/// // We can also request the whole strongly connected set containing 8. Here
/// // 7, 8, and 9 are all reachable from one another.
///
/// let mut set = strongly_connected_component(&8, successors);
/// set.sort();
/// assert_eq!(set, vec![7, 8, 9]);
/// ```
pub fn topological_sort<N, FN, IN>(nodes: &[N], mut successors: FN) -> Result<Vec<N>, N>
where
N: Eq + Hash + Clone,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
let mut unmarked: MutSet<N> = nodes.iter().cloned().collect::<MutSet<_>>();
let mut marked = HashSet::with_capacity_and_hasher(nodes.len(), default_hasher());
let mut temp = MutSet::default();
let mut sorted = VecDeque::with_capacity(nodes.len());
while let Some(node) = unmarked.iter().cloned().next() {
temp.clear();
visit(
&node,
&mut successors,
&mut unmarked,
&mut marked,
&mut temp,
&mut sorted,
)?;
}
Ok(sorted.into_iter().collect())
}
fn visit<N, FN, IN>(
node: &N,
successors: &mut FN,
unmarked: &mut MutSet<N>,
marked: &mut MutSet<N>,
temp: &mut MutSet<N>,
sorted: &mut VecDeque<N>,
) -> Result<(), N>
where
N: Eq + Hash + Clone,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
unmarked.remove(node);
if marked.contains(node) {
return Ok(());
}
if temp.contains(node) {
return Err(node.clone());
}
temp.insert(node.clone());
for n in successors(node) {
visit(&n, successors, unmarked, marked, temp, sorted)?;
}
marked.insert(node.clone());
sorted.push_front(node.clone());
Ok(())
}
/// Topologically sort a directed graph into groups of independent nodes.
///
/// - `nodes` is a collection of nodes.
/// - `successors` returns a list of successors for a given node.
///
/// This function works like [`topological_sort`](self::topological_sort), but
/// rather than producing a single ordering of nodes, this function partitions
/// the nodes into groups: the first group contains all nodes with no
/// dependencies, the second group contains all nodes whose only dependencies
/// are in the first group, and so on. Concatenating the groups produces a
/// valid topological sort regardless of how the nodes within each group are
/// reordered. No guarantees are made about the order of nodes within each
/// group.
///
/// The function returns either `Ok` with a valid list of groups, or `Err` with
/// a (groups, remaining) tuple containing a (possibly empty) partial list of
/// groups, and a list of remaining nodes that could not be grouped due to
/// cycles. In the error case, the strongly connected set(s) can then be found
/// using the
/// [`strongly_connected_components`](super::strongly_connected_components::strongly_connected_components)
/// function on the list of remaining nodes.
///
/// The current implementation uses a variation of [Kahn's
/// algorithm](https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm),
/// and runs in O(|V| + |E|) time.
#[allow(clippy::type_complexity)]
#[allow(dead_code)]
pub fn topological_sort_into_groups<N, FN, IN>(
nodes: &[N],
mut successors: FN,
) -> Result<Vec<Vec<N>>, (Vec<Vec<N>>, Vec<N>)>
where
N: Eq + Hash + Clone,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
if nodes.is_empty() {
return Ok(Vec::new());
}
let mut succs_map = HashMap::<N, MutSet<N>, BuildHasher>::with_capacity_and_hasher(nodes.len(), default_hasher());
let mut preds_map = HashMap::<N, usize, BuildHasher>::with_capacity_and_hasher(nodes.len(), default_hasher());
for node in nodes.iter() {
succs_map.insert(node.clone(), successors(node).into_iter().collect());
preds_map.insert(node.clone(), 0);
}
for succs in succs_map.values() {
for succ in succs.iter() {
*preds_map.get_mut(succ).unwrap() += 1;
}
}
let mut groups = Vec::<Vec<N>>::new();
let mut prev_group: Vec<N> = preds_map
.iter()
.filter_map(|(node, &num_preds)| {
if num_preds == 0 {
Some(node.clone())
} else {
None
}
})
.collect();
if prev_group.is_empty() {
let remaining: Vec<N> = preds_map.into_iter().map(|(node, _)| node).collect();
return Err((Vec::new(), remaining));
}
for node in prev_group.iter() {
preds_map.remove(node);
}
while !preds_map.is_empty() {
let mut next_group = Vec::<N>::new();
for node in prev_group.iter() {
for succ in &succs_map[node] {
{
let num_preds = preds_map.get_mut(succ).unwrap();
*num_preds -= 1;
if *num_preds > 0 {
continue;
}
}
next_group.push(preds_map.remove_entry(succ).unwrap().0);
}
}
groups.push(mem::replace(&mut prev_group, next_group));
if prev_group.is_empty() {
let remaining: Vec<N> = preds_map.into_iter().map(|(node, _)| node).collect();
return Err((groups, remaining));
}
}
groups.push(prev_group);
Ok(groups)
}
// Separate nodes of a directed graph into [strongly connected
// components](https://en.wikipedia.org/wiki/Strongly_connected_component).
//
// A [path-based strong component
// algorithm](https://en.wikipedia.org/wiki/Path-based_strong_component_algorithm)
// is used.
struct Params<N, FN, IN>
where
N: Clone + Hash + Eq,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
preorders: HashMap<N, Option<usize>, BuildHasher>,
c: usize,
successors: FN,
p: Vec<N>,
s: Vec<N>,
scc: Vec<Vec<N>>,
scca: MutSet<N>,
}
impl<N, FN, IN> Params<N, FN, IN>
where
N: Clone + Hash + Eq,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
fn new(nodes: &[N], successors: FN) -> Self {
Params {
preorders: nodes
.iter()
.map(|n| (n.clone(), None))
.collect::<HashMap<N, Option<usize>, BuildHasher>>(),
c: 0,
successors,
p: Vec::new(),
s: Vec::new(),
scc: Vec::new(),
scca: MutSet::default(),
}
}
}
fn recurse_onto<N, FN, IN>(v: &N, params: &mut Params<N, FN, IN>)
where
N: Clone + Hash + Eq,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
params.preorders.insert(v.clone(), Some(params.c));
params.c += 1;
params.s.push(v.clone());
params.p.push(v.clone());
for w in (params.successors)(v) {
if !params.scca.contains(&w) {
if let Some(pw) = params.preorders.get(&w).and_then(|w| *w) {
while params.preorders[&params.p[params.p.len() - 1]].unwrap() > pw {
params.p.pop();
}
} else {
recurse_onto(&w, params);
}
}
}
if params.p[params.p.len() - 1] == *v {
params.p.pop();
let mut component = Vec::new();
while let Some(node) = params.s.pop() {
component.push(node.clone());
params.scca.insert(node.clone());
params.preorders.remove(&node);
if node == *v {
break;
}
}
params.scc.push(component);
}
}
/// Partition nodes reachable from a starting point into strongly connected components.
///
/// - `start` is the node we want to explore the graph from.
/// - `successors` returns a list of successors for a given node.
///
/// The function returns a list of strongly connected components sets. It will contain
/// at least one component (the one containing the `start` node).
pub fn strongly_connected_components_from<N, FN, IN>(start: &N, successors: FN) -> Vec<Vec<N>>
where
N: Clone + Hash + Eq,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
let mut params = Params::new(&[], successors);
recurse_onto(start, &mut params);
params.scc
}
/// Compute the strongly connected component containing a given node.
///
/// - `node` is the node we want the strongly connected component for.
/// - `successors` returns a list of successors for a given node.
///
/// The function returns the strongly connected component containing the node,
/// which is guaranteed to contain at least `node`.
pub fn strongly_connected_component<N, FN, IN>(node: &N, successors: FN) -> Vec<N>
where
N: Clone + Hash + Eq,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
strongly_connected_components_from(node, successors)
.pop()
.unwrap()
}
/// Partition all strongly connected components in a graph.
///
/// - `nodes` is a collection of nodes.
/// - `successors` returns a list of successors for a given node.
///
/// The function returns a list of strongly connected components sets.
#[allow(dead_code)]
pub fn strongly_connected_components<N, FN, IN>(nodes: &[N], successors: FN) -> Vec<Vec<N>>
where
N: Clone + Hash + Eq,
FN: FnMut(&N) -> IN,
IN: IntoIterator<Item = N>,
{
let mut params = Params::new(nodes, successors);
while let Some(node) = params.preorders.keys().find(|_| true).cloned() {
recurse_onto(&node, &mut params);
}
params.scc
}

View file

@ -6,7 +6,7 @@ pub mod operator;
pub mod region;
pub mod canonicalize;
pub mod collections;
// mod ena;
mod graph;
// #[macro_use]
// extern crate log;
@ -18,6 +18,5 @@ extern crate im_rc;
extern crate fraction;
extern crate num;
extern crate fxhash;
extern crate pathfinding;
#[macro_use] extern crate combine;

View file

@ -1,6 +1,7 @@
use roc::expr::{Expr, Pattern};
use roc::region::{Located, Region};
use roc::collections::{MutSortedMap};
use std::hash::Hash;
use roc::collections::{MutMap};
pub fn loc_box<T>(val: T) -> Box<Located<T>> {
Box::new(loc(val))
@ -73,11 +74,11 @@ pub fn zero_loc_pattern(loc_pattern: Located<Pattern>) -> Located<Pattern> {
}
#[allow(dead_code)] // For some reason rustc thinks this isn't used. It is, though, in test_canonicalize.rs
pub fn mut_sorted_map_from_pairs<K, V, I>(pairs: I) -> MutSortedMap<K, V>
pub fn mut_map_from_pairs<K, V, I>(pairs: I) -> MutMap<K, V>
where I: IntoIterator<Item=(K, V)>,
K: Ord
K: Hash + Eq
{
let mut answer = MutSortedMap::default();
let mut answer = MutMap::default();
for (key, value) in pairs {
answer.insert(key, value);

View file

@ -17,22 +17,22 @@ mod test_canonicalize {
use roc::operator::Operator;
use roc::region::{Located, Region};
use roc::parse;
use roc::collections::{ImSortedMap, ImSortedSet, MutSortedMap};
use roc::collections::{ImMap, ImSet, MutMap};
use roc::parse_state::{IndentablePosition};
use combine::{Parser, eof};
use combine::stream::state::{State};
use helpers::{loc, loc_box, empty_region, zero_loc_expr, mut_sorted_map_from_pairs};
use helpers::{loc, loc_box, empty_region, zero_loc_expr, mut_map_from_pairs};
fn can_expr(expr_str: &str) -> (Expr, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
can_expr_with("testDecl", expr_str, &ImSortedMap::default(), &ImSortedMap::default())
fn can_expr(expr_str: &str) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
can_expr_with("testDecl", expr_str, &ImMap::default(), &ImMap::default())
}
fn can_expr_with(
name: &str,
expr_str: &str,
declared_idents: &ImSortedMap<Ident, (Symbol, Region)>,
declared_variants: &ImSortedMap<Symbol, Located<expr::VariantName>>,
) -> (Expr, Output, Vec<Problem>, MutSortedMap<Symbol, Procedure>) {
declared_idents: &ImMap<Ident, (Symbol, Region)>,
declared_variants: &ImMap<Symbol, Located<expr::VariantName>>,
) -> (Expr, Output, Vec<Problem>, MutMap<Symbol, Procedure>) {
let parse_state: State<&str, IndentablePosition> = State::with_positioner(expr_str, IndentablePosition::default());
let expr = match parse::expr().skip(eof()).easy_parse(parse_state) {
Ok((expr, state)) => {
@ -90,8 +90,8 @@ mod test_canonicalize {
}
}
fn vec_to_set<'a>(vec: Vec<&'a str>) -> ImSortedSet<Symbol> {
ImSortedSet::from(vec.into_iter().map(sym).collect::<Vec<_>>())
fn vec_to_set<'a>(vec: Vec<&'a str>) -> ImSet<Symbol> {
ImSet::from(vec.into_iter().map(sym).collect::<Vec<_>>())
}
// BASIC CANONICALIZATION
@ -117,7 +117,7 @@ mod test_canonicalize {
}.into());
assert_eq!(procedures,
mut_sorted_map_from_pairs(vec![(sym("func"),
mut_map_from_pairs(vec![(sym("func"),
Procedure {
name: Some("func".to_string()),
is_self_tail_recursive: false,
@ -340,59 +340,80 @@ mod test_canonicalize {
tail_call: None
}.into());
// This should get reordered to the following, so that in code gen
// everything will have been set before it gets read.
// (The order of the function definitions doesn't matter.)
assert_assignment_order(expr,
vec!["func1", "x", "z", "func2", "y"],
);
let symbols = assigned_symbols(expr);
// In code gen, for everything to have been set before it gets read,
// the following must be true about when things are assigned:
//
// x and func2 must be assigned (in either order) before y
// y and func1 must be assigned (in either order) before z
assert_before("x", "y", &symbols);
assert_before("func2", "y", &symbols);
assert_before("func1", "z", &symbols);
assert_before("y", "z", &symbols);
}
fn assert_assignment_order(expr: Expr, expected_strings: Vec<&str>) {
fn assert_before(before: &str, after: &str, symbols: &Vec<Symbol>) {
assert_ne!(before, after);
let before_symbol = sym(before);
let after_symbol = sym(after);
let before_index = symbols.iter().position(|symbol| symbol == &before_symbol).unwrap_or_else(||
panic!("error in assert_before({:?}, {:?}): {:?} could not be found in {:?}", before, after, sym(before), symbols)
);
let after_index = symbols.iter().position(|symbol| symbol == &after_symbol).unwrap_or_else(||
panic!("error in assert_before({:?}, {:?}): {:?} could not be found in {:?}", before, after, sym(after), symbols)
);
if before_index == after_index {
panic!("error in assert_before({:?}, {:?}): both were at index {} in {:?}", before, after, after_index, symbols);
} else if before_index > after_index {
panic!("error in assert_before: {:?} appeared *after* {:?} (not before, as expected) in {:?}", before, after, symbols);
}
}
fn assigned_symbols(expr: Expr) -> Vec<Symbol> {
match expr {
Assign(assignments, _) => {
let expected_symbols: Vec<Symbol> = expected_strings.into_iter().map(sym).collect();
let actual_symbols: Vec<Symbol> = assignments.into_iter().map(|(pattern, _)| {
assignments.into_iter().map(|(pattern, _)| {
match pattern {
Identifier(symbol) => {
symbol
},
_ => {
panic!("Called assert_assignment_order passing an Assign expr with non-Identifier patterns!");
panic!("Called assigned_symbols passing an Assign expr with non-Identifier patterns!");
}
}
}).collect();
assert_eq!(actual_symbols, expected_symbols);
}).collect()
}
_ => {
panic!("Called assert_assignment_order passing a non-Assign expr!");
panic!("Called assigned_symbols passing a non-Assign expr!");
}
}
}
// CIRCULAR ASSIGNMENT
#[test]
fn circular_assignment() {
let (_, _, problems, _) = can_expr(indoc!(r#"
c = d + 3
b = 2 + c
d = a + 7
a = b + 1
b = 2 * c
c = a 7
2 + c
2 + d
"#));
assert_eq!(problems, vec![
Problem::CircularAssignment(vec![
loc(unqualified("c")),
loc(unqualified("b")),
loc(unqualified("a")),
loc(unqualified("b")),
loc(unqualified("c")),
loc(unqualified("d")),
])
]);
panic!("TODO strongly_connected_component doesn't sort these, but we want them sorted!");
}