Use topological sort.

This commit is contained in:
Richard Feldman 2019-07-31 01:34:20 -04:00
parent e4629b1992
commit fb9cf4b60e
4 changed files with 89 additions and 11 deletions

28
Cargo.lock generated
View file

@ -87,6 +87,11 @@ dependencies = [
"typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "indexmap"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "indoc"
version = "0.3.3"
@ -108,6 +113,14 @@ dependencies = [
"unindent 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "itertools"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "lazy_static"
version = "1.3.0"
@ -205,6 +218,17 @@ name = "ordermap"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "pathfinding"
version = "1.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"fixedbitset 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
"indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "petgraph"
version = "0.4.13"
@ -262,6 +286,7 @@ dependencies = [
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"num 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"pathfinding 1.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -366,8 +391,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum fraction 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1055159ac82fb210c813303f716b6c8db57ace9d5ec2dbbc2e1d7a864c1dd74e"
"checksum fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
"checksum im-rc 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0a0197597d095c0d11107975d3175173f810ee572c2501ff4de64f4f3f119806"
"checksum indexmap 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e81a7c05f79578dbc15793d8b619db9ba32b4577003ef3af1a91c416798c58d"
"checksum indoc 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1f59f228c76fda6ecd8dab79683039a7054c748587f682a911094f473647bd6"
"checksum indoc-impl 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "63f070ef080db3601c1a0ecc75c7bb35104cc0ce2d7c4e049952a96a61d8933b"
"checksum itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5b8467d9c1cebe26feb08c640139247fac215782d35371ade9a2136ed6085358"
"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14"
"checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6"
"checksum maplit 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "08cbb6b4fef96b6d77bfc40ec491b1690c779e77b05cd9f07f787ed376fd4c43"
@ -380,6 +407,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum num-rational 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f2885278d5fe2adc2f75ced642d52d879bffaceb5a2e0b1d4309ffdfb239b454"
"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32"
"checksum ordermap 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a86ed3f5f244b372d6b1a00b72ef7f8876d0bc6a78a4c9985c53614041512063"
"checksum pathfinding 1.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "37691aaf6640549d85ed79575cb159843b07380d420aac9e891b627e7cc3f1f3"
"checksum petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3659d1ee90221741f65dd128d9998311b0e40c5d3c23a62445938214abce4f"
"checksum pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3a029430f0d744bc3d15dd474d591bed2402b645d024583082b9f63bb936dac6"
"checksum proc-macro-hack 0.5.8 (registry+https://github.com/rust-lang/crates.io-index)" = "982a35d1194084ba319d65c4a68d24ca28f5fdb5b8bc20899e4eef8641ea5178"

View file

@ -12,6 +12,7 @@ im-rc = "13.0.0"
fraction = "0.6.2"
num = "0.2.0"
fxhash = "0.2.1"
pathfinding = "1.1.12"
[dev-dependencies]
pretty_assertions = "0.5.1"

View file

@ -6,9 +6,10 @@ use collections::{ImSet, ImMap, MutMap};
use std::cmp::Ordering;
use expr::{Ident, VariantName};
use expr;
use pathfinding::directed::topological_sort::topological_sort;
use pathfinding::directed::bfs::bfs_loop;
use self::PatternType::*;
#[derive(Clone, Debug, PartialEq)]
pub enum Expr {
// Literals
@ -45,6 +46,7 @@ pub enum Expr {
UnrecognizedFunctionName(Located<expr::Ident>),
UnrecognizedConstant(Located<expr::Ident>),
UnrecognizedVariant(Located<expr::VariantName>),
CircularAssignment(Vec<Located<expr::Ident>>, Vec<(Pattern, Located<Expr>)>, Box<Located<Expr>>),
}
/// Problems that can occur in the course of canonicalization.
@ -507,9 +509,10 @@ fn canonicalize(
scope.idents = scope.idents.union(assigned_idents.clone());
let mut refs_by_assignment: MutMap<Symbol, (Region, References)> = MutMap::default();
let mut refs_by_assignment: MutMap<Symbol, (Located<Ident>, References)> = MutMap::default();
let mut can_assignments_by_symbol: MutMap<Symbol, (Pattern, Located<Expr>)> = MutMap::default();
let can_assignments: Vec<(Pattern, Located<Expr>)> = assignments.into_iter().map(|(loc_pattern, expr)| {
for (loc_pattern, expr) in assignments {
// Each assignment gets to have all the idents in scope that are assigned in this
// block. Order of assignments doesn't matter, thanks to referential transparency!
let (loc_can_expr, can_output) = canonicalize(env, &mut scope, expr);
@ -520,13 +523,16 @@ fn canonicalize(
remove_idents(loc_pattern.value.clone(), &mut shadowable_idents);
let can_pattern = canonicalize_pattern(env, &mut scope, &Assignment, &loc_pattern, &mut shadowable_idents);
let mut assigned_symbols = Vec::new();
// Store the referenced locals in the refs_by_assignment map, so we can later figure out
// which assigned names reference each other.
for (symbol, region) in idents_from_patterns(std::iter::once(loc_pattern.clone()), &scope).values() {
for (ident, (symbol, region)) in idents_from_patterns(std::iter::once(loc_pattern.clone()), &scope) {
let refs = can_output.references.clone();
refs_by_assignment.insert(symbol.clone(), (*region, refs));
refs_by_assignment.insert(symbol.clone(), (Located {value: ident, region}, refs));
assigned_symbols.push(symbol.clone());
}
// Give closures names (and tail-recursive status) where appropriate.
@ -567,8 +573,13 @@ fn canonicalize(
_ => loc_can_expr.value
};
(can_pattern, Located {region: loc_can_expr.region, value: can_expr})
}).collect();
for symbol in assigned_symbols {
can_assignments_by_symbol.insert(
symbol,
(can_pattern.clone(), Located {region: loc_can_expr.region, value: can_expr.clone()})
);
}
}
// The assignment as a whole is a tail call iff its return expression is a tail call.
// Use its output as a starting point because its tail_call already has the right answer!
@ -597,7 +608,7 @@ fn canonicalize(
// Now that we've collected all the references, check to see if any of the new idents
// we defined went unused by the return expression. If any were unused, report it.
for (ident, (symbol, region)) in assigned_idents {
for (ident, (symbol, region)) in assigned_idents.clone() {
if !output.references.has_local(&symbol) {
let loc_ident = Located {region: region.clone(), value: ident.clone()};
@ -605,7 +616,44 @@ fn canonicalize(
}
}
(Assign(can_assignments, Box::new(ret_expr)), output)
// Use topological sort to reorder the assignments based on their dependencies to one another.
// This way, during code gen, no assignment will refer to a value that hasn't been initialized yet.
// As a bonus, the topological sort also reveals any cycles between the assignments, allowing
// us to give a CircularAssignment error.
let successors = |symbol: &Symbol| -> ImSet<Symbol> {
let (_, references) = refs_by_assignment.get(symbol).unwrap();
references.locals.clone()
};
let assigned_symbols: Vec<Symbol> =
can_assignments_by_symbol.keys().into_iter().map(Symbol::clone).collect();
match topological_sort(assigned_symbols.as_slice(), successors) {
Ok(sorted_symbols) => {
let can_assignments =
sorted_symbols
.into_iter()
.map(|symbol| can_assignments_by_symbol.get(&symbol).unwrap().clone())
.collect();
(Assign(can_assignments, Box::new(ret_expr)), output)
},
Err(node_in_cycle) => {
// We have one node we know is in the cycle.
// We want to show the entire cycle in the error message, so expand it out.
let loc_idents_in_cycle =
bfs_loop(&node_in_cycle, successors)
.unwrap()
.into_iter()
.map(|symbol| refs_by_assignment.get(&symbol).unwrap().0.clone())
.collect();
let can_assignments = can_assignments_by_symbol.values().map(|tuple| tuple.clone()).collect();
(CircularAssignment(loc_idents_in_cycle, can_assignments, Box::new(ret_expr)), output)
}
}
},
expr::Expr::Closure(loc_arg_patterns, box_loc_body_expr) => {
@ -748,10 +796,10 @@ fn canonicalize(
(Located {region, value: expr}, output)
}
fn get_all_referenced(
fn get_all_referenced<T>(
assigned_symbol: Symbol,
visited: &mut ImSet<Symbol>,
refs_by_assignment: &MutMap<Symbol, (Region, References)>
refs_by_assignment: &MutMap<Symbol, (T, References)>
) -> References {
match refs_by_assignment.get(&assigned_symbol) {
Some((_, refs)) => {

View file

@ -18,5 +18,6 @@ extern crate im_rc;
extern crate fraction;
extern crate num;
extern crate fxhash;
extern crate pathfinding;
#[macro_use] extern crate combine;