remove old def sorting code

This commit is contained in:
Folkert 2022-04-22 15:09:45 +02:00
parent c763d51551
commit 8837eb2c19
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
2 changed files with 3 additions and 434 deletions

View file

@ -4,7 +4,7 @@ use crate::annotation::IntroducedVariables;
use crate::env::Env;
use crate::expr::ClosureData;
use crate::expr::Expr::{self, *};
use crate::expr::{canonicalize_expr, local_successors_with_duplicates, Output, Recursive};
use crate::expr::{canonicalize_expr, Output, Recursive};
use crate::pattern::{bindings_from_patterns, canonicalize_def_header_pattern, Pattern};
use crate::procedure::References;
use crate::scope::create_alias;
@ -1108,409 +1108,6 @@ pub fn sort_can_defs_improved(
}
}
#[inline(always)]
pub fn sort_can_defs(
env: &mut Env<'_>,
defs: CanDefs,
mut output: Output,
) -> (Result<Vec<Declaration>, RuntimeError>, Output) {
let CanDefs {
refs_by_symbol,
mut can_defs_by_symbol,
aliases,
} = defs;
for (symbol, alias) in aliases.into_iter() {
output.aliases.insert(symbol, alias);
}
let mut defined_symbols: Vec<Symbol> = Vec::new();
for symbol in can_defs_by_symbol.keys() {
defined_symbols.push(*symbol);
}
// Use topological sort to reorder the defs based on their dependencies to one another.
// This way, during code gen, no def will refer to a value that hasn't been initialized yet.
// As a bonus, the topological sort also reveals any cycles between the defs, allowing
// us to give a CircularAssignment error for invalid (mutual) recursion, and a `DeclareRec` for mutually
// recursive definitions.
// All successors that occur in the body of a symbol.
let all_successors_without_self = |symbol: &Symbol| -> Vec<Symbol> {
// This may not be in refs_by_symbol. For example, the `f` in `f x` here:
//
// f = \z -> z
//
// (\x ->
// a = f x
// x
// )
//
// It's not part of the current defs (the one with `a = f x`); rather,
// it's in the enclosing scope. It's still referenced though, so successors
// will receive it as an argument!
match refs_by_symbol.get(symbol) {
Some((_, references)) => {
// We can only sort the symbols at the current level. That is safe because
// symbols defined at higher levels cannot refer to symbols at lower levels.
// Therefore they can never form a cycle!
//
// In the above example, `f` cannot reference `a`, and in the closure
// a call to `f` cannot cycle back to `a`.
let mut loc_succ = local_successors_with_duplicates(references, &env.closures);
// if the current symbol is a closure, peek into its body
if let Some(references) = env.closures.get(symbol) {
let home = env.home;
for lookup in references.value_lookups() {
if lookup != symbol && lookup.module_id() == home {
// DO NOT register a self-call behind a lambda!
//
// We allow `boom = \_ -> boom {}`, but not `x = x`
loc_succ.push(*lookup);
}
}
}
// remove anything that is not defined in the current block
loc_succ.retain(|key| defined_symbols.contains(key));
loc_succ.sort();
loc_succ.dedup();
loc_succ
}
None => vec![],
}
};
// All successors that occur in the body of a symbol, including the symbol itself
// This is required to determine whether a symbol is recursive. Recursive symbols
// (that are not faulty) always need a DeclareRec, even if there is just one symbol in the
// group
let mut all_successors_with_self = |symbol: &Symbol| -> Vec<Symbol> {
// This may not be in refs_by_symbol. For example, the `f` in `f x` here:
//
// f = \z -> z
//
// (\x ->
// a = f x
// x
// )
//
// It's not part of the current defs (the one with `a = f x`); rather,
// it's in the enclosing scope. It's still referenced though, so successors
// will receive it as an argument!
match refs_by_symbol.get(symbol) {
Some((_, references)) => {
// We can only sort the symbols at the current level. That is safe because
// symbols defined at higher levels cannot refer to symbols at lower levels.
// Therefore they can never form a cycle!
//
// In the above example, `f` cannot reference `a`, and in the closure
// a call to `f` cannot cycle back to `a`.
let mut loc_succ = local_successors_with_duplicates(references, &env.closures);
// if the current symbol is a closure, peek into its body
if let Some(references) = env.closures.get(symbol) {
for lookup in references.value_lookups() {
loc_succ.push(*lookup);
}
}
// remove anything that is not defined in the current block
loc_succ.retain(|key| defined_symbols.contains(key));
loc_succ.sort();
loc_succ.dedup();
loc_succ
}
None => vec![],
}
};
// If a symbol is a direct successor of itself, there is an invalid cycle.
// The difference with the function above is that this one does not look behind lambdas,
// but does consider direct self-recursion.
let direct_successors = |symbol: &Symbol| -> Vec<Symbol> {
match refs_by_symbol.get(symbol) {
Some((_, references)) => {
let mut loc_succ = local_successors_with_duplicates(references, &env.closures);
// NOTE: if the symbol is a closure we DONT look into its body
// remove anything that is not defined in the current block
loc_succ.retain(|key| defined_symbols.contains(key));
// NOTE: direct recursion does matter here: `x = x` is invalid recursion!
loc_succ.sort();
loc_succ.dedup();
loc_succ
}
None => vec![],
}
};
let grouped2 = ven_graph::topological_sort_into_groups(
defined_symbols.as_slice(),
all_successors_without_self,
);
// TODO also do the same `addDirects` check elm/compiler does, so we can
// report an error if a recursive definition can't possibly terminate!
match grouped2 {
Ok(groups) => {
let mut declarations = Vec::new();
// groups are in reversed order
for group in groups.into_iter().rev() {
group_to_declaration(
&group,
&env.closures,
&mut all_successors_with_self,
&mut can_defs_by_symbol,
&mut declarations,
);
}
(Ok(declarations), output)
}
Err((mut groups, nodes_in_cycle)) => {
let mut declarations = Vec::new();
let mut problems = Vec::new();
// nodes_in_cycle are symbols that form a syntactic cycle. That isn't always a problem,
// and in general it's impossible to decide whether it is. So we use a crude heuristic:
//
// Definitions where the cycle occurs behind a lambda are OK
//
// boom = \_ -> boom {}
//
// But otherwise we report an error, e.g.
//
// foo = if b then foo else bar
for cycle in strongly_connected_components(&nodes_in_cycle, all_successors_without_self)
{
// check whether the cycle is faulty, which is when it has
// a direct successor in the current cycle. This catches things like:
//
// x = x
//
// or
//
// p = q
// q = p
let is_invalid_cycle = match cycle.get(0) {
Some(symbol) => {
let mut succs = direct_successors(symbol);
succs.retain(|key| cycle.contains(key));
!succs.is_empty()
}
None => false,
};
if is_invalid_cycle {
// We want to show the entire cycle in the error message, so expand it out.
let mut entries = Vec::new();
for symbol in &cycle {
match refs_by_symbol.get(symbol) {
None => unreachable!(
r#"Symbol `{:?}` not found in refs_by_symbol! refs_by_symbol was: {:?}"#,
symbol, refs_by_symbol
),
Some((region, _)) => {
let expr_region =
can_defs_by_symbol.get(symbol).unwrap().loc_expr.region;
let entry = CycleEntry {
symbol: *symbol,
symbol_region: *region,
expr_region,
};
entries.push(entry);
}
}
}
// Sort them by line number to make the report more helpful.
entries.sort_by_key(|entry| entry.symbol_region);
problems.push(Problem::RuntimeError(RuntimeError::CircularDef(
entries.clone(),
)));
declarations.push(Declaration::InvalidCycle(entries));
}
// if it's an invalid cycle, other groups may depend on the
// symbols defined here, so also push this cycle onto the groups
//
// if it's not an invalid cycle, this is slightly inefficient,
// because we know this becomes exactly one DeclareRec already
groups.push(cycle);
}
// now we have a collection of groups whose dependencies are not cyclic.
// They are however not yet topologically sorted. Here we have to get a bit
// creative to get all the definitions in the correct sorted order.
let mut group_ids = Vec::with_capacity(groups.len());
let mut symbol_to_group_index = MutMap::default();
for (i, group) in groups.iter().enumerate() {
for symbol in group {
symbol_to_group_index.insert(*symbol, i);
}
group_ids.push(i);
}
let successors_of_group = |group_id: &usize| {
let mut result = MutSet::default();
// for each symbol in this group
for symbol in &groups[*group_id] {
// find its successors
for succ in all_successors_without_self(symbol) {
// and add its group to the result
match symbol_to_group_index.get(&succ) {
Some(index) => {
result.insert(*index);
}
None => unreachable!("no index for symbol {:?}", succ),
}
}
}
// don't introduce any cycles to self
result.remove(group_id);
result
};
match ven_graph::topological_sort_into_groups(&group_ids, successors_of_group) {
Ok(sorted_group_ids) => {
for sorted_group in sorted_group_ids.iter().rev() {
for group_id in sorted_group.iter().rev() {
let group = &groups[*group_id];
group_to_declaration(
group,
&env.closures,
&mut all_successors_with_self,
&mut can_defs_by_symbol,
&mut declarations,
);
}
}
}
Err(_) => unreachable!("there should be no cycles now!"),
}
for problem in problems {
env.problem(problem);
}
(Ok(declarations), output)
}
}
}
fn group_to_declaration(
group: &[Symbol],
closures: &MutMap<Symbol, References>,
successors: &mut dyn FnMut(&Symbol) -> Vec<Symbol>,
can_defs_by_symbol: &mut MutMap<Symbol, Def>,
declarations: &mut Vec<Declaration>,
) {
use Declaration::*;
// We want only successors in the current group, otherwise definitions get duplicated
let filtered_successors = |symbol: &Symbol| -> Vec<Symbol> {
let mut result = successors(symbol);
result.retain(|key| group.contains(key));
result
};
// Patterns like
//
// { x, y } = someDef
//
// Can bind multiple symbols. When not incorrectly recursive (which is guaranteed in this function),
// normally `someDef` would be inserted twice. We use the region of the pattern as a unique key
// for a definition, so every definition is only inserted (thus typechecked and emitted) once
let mut seen_pattern_regions: Vec<Region> = Vec::with_capacity(2);
for cycle in strongly_connected_components(group, filtered_successors) {
if cycle.len() == 1 {
let symbol = &cycle[0];
match can_defs_by_symbol.remove(symbol) {
Some(mut new_def) => {
// Determine recursivity of closures that are not tail-recursive
if let Closure(ClosureData {
recursive: recursive @ Recursive::NotRecursive,
..
}) = &mut new_def.loc_expr.value
{
*recursive = closure_recursivity(*symbol, closures);
}
let is_recursive = successors(symbol).contains(symbol);
if !seen_pattern_regions.contains(&new_def.loc_pattern.region) {
seen_pattern_regions.push(new_def.loc_pattern.region);
if is_recursive {
declarations.push(DeclareRec(vec![new_def]));
} else {
declarations.push(Declare(new_def));
}
}
}
None => roc_error_macros::internal_error!("def not available {:?}", symbol),
}
} else {
let mut can_defs = Vec::new();
// Topological sort gives us the reverse of the sorting we want!
for symbol in cycle.into_iter().rev() {
match can_defs_by_symbol.remove(&symbol) {
Some(mut new_def) => {
// Determine recursivity of closures that are not tail-recursive
if let Closure(ClosureData {
recursive: recursive @ Recursive::NotRecursive,
..
}) = &mut new_def.loc_expr.value
{
*recursive = closure_recursivity(symbol, closures);
}
if !seen_pattern_regions.contains(&new_def.loc_pattern.region) {
seen_pattern_regions.push(new_def.loc_pattern.region);
can_defs.push(new_def);
}
}
None => roc_error_macros::internal_error!("def not available {:?}", symbol),
}
}
declarations.push(DeclareRec(can_defs));
}
}
}
fn strongly_connected_components_improved(
length: usize,
bitvec: &bitvec::vec::BitVec<u8>,
@ -2277,7 +1874,7 @@ pub fn can_defs_with_return<'a>(
}
}
let (can_defs, output) = sort_can_defs(env, unsorted, output);
let (can_defs, output) = sort_can_defs_improved(env, unsorted, output);
match can_defs {
Ok(decls) => {

View file

@ -9,7 +9,7 @@ use crate::num::{
use crate::pattern::{canonicalize_pattern, Pattern};
use crate::procedure::References;
use crate::scope::Scope;
use roc_collections::{MutMap, MutSet, SendMap, VecMap, VecSet};
use roc_collections::{MutSet, SendMap, VecMap, VecSet};
use roc_module::called_via::CalledVia;
use roc_module::ident::{ForeignSymbol, Lowercase, TagName};
use roc_module::low_level::LowLevel;
@ -1091,34 +1091,6 @@ fn canonicalize_when_branch<'a>(
)
}
pub fn local_successors_with_duplicates<'a>(
references: &'a References,
closures: &'a MutMap<Symbol, References>,
) -> Vec<Symbol> {
let mut answer: Vec<_> = references.value_lookups().copied().collect();
let mut stack: Vec<_> = references.calls().copied().collect();
let mut seen = Vec::new();
while let Some(symbol) = stack.pop() {
if seen.contains(&symbol) {
continue;
}
if let Some(references) = closures.get(&symbol) {
answer.extend(references.value_lookups().copied());
stack.extend(references.calls().copied());
seen.push(symbol);
}
}
answer.sort();
answer.dedup();
answer
}
enum CanonicalizeRecordProblem {
InvalidOptionalValue {
field_name: Lowercase,