Make nested datatypes into errors

I was hoping to add nested datatypes into the language, but it turns out
doing so is quite tricky and not all that useful with Roc's current
compilation model. Basically every implementation strategy I could think
of ended up requiring a uniform representation for the data layout
(or some ugly workaround). Furhermore it increased the complexity of the
checker/mono IR generator a little bit - basically, we must always pass
around the alias definitions of nested datatypes and instantiate them
at usage sites, rather than being able to unroll aliases as we currently
do during canonicalization.

So, especially because we don't support polymorphic recursion anyway, I
think it may be better to simply disallow any kind of nested datatypes
in the language. In any case, Stephanie Weirich [seems to think nested
datatypes are not needed](https://www.cis.upenn.edu/~plclub/blog/2020-12-04-nested-datatypes/).

Closes #2293
This commit is contained in:
ayazhafiz 2022-01-30 21:23:53 -05:00
parent 8be1deff97
commit 4e942b3e5d
9 changed files with 284 additions and 59 deletions

View file

@ -357,7 +357,7 @@ fn can_annotation_help(
actual: Box::new(actual), actual: Box::new(actual),
} }
} }
None => Type::Apply(symbol, args), None => Type::Apply(symbol, args, region),
} }
} }
BoundVariable(v) => { BoundVariable(v) => {
@ -377,7 +377,8 @@ fn can_annotation_help(
As( As(
loc_inner, loc_inner,
_spaces, _spaces,
AliasHeader { alias_header
@ AliasHeader {
name, name,
vars: loc_vars, vars: loc_vars,
}, },
@ -439,20 +440,43 @@ fn can_annotation_help(
} }
} }
let alias_args = vars.iter().map(|(_, v)| v.clone()).collect::<Vec<_>>();
let alias_actual = if let Type::TagUnion(tags, ext) = inner_type { let alias_actual = if let Type::TagUnion(tags, ext) = inner_type {
let rec_var = var_store.fresh(); let rec_var = var_store.fresh();
let mut new_tags = Vec::with_capacity(tags.len()); let mut new_tags = Vec::with_capacity(tags.len());
let mut is_nested_datatype = false;
for (tag_name, args) in tags { for (tag_name, args) in tags {
let mut new_args = Vec::with_capacity(args.len()); let mut new_args = Vec::with_capacity(args.len());
for arg in args { for arg in args {
let mut new_arg = arg.clone(); let mut new_arg = arg.clone();
new_arg.substitute_alias(symbol, &Type::Variable(rec_var)); let substitution_result =
new_arg.substitute_alias(symbol, &alias_args, &Type::Variable(rec_var));
if let Err(differing_recursion_region) = substitution_result {
env.problems
.push(roc_problem::can::Problem::NestedDatatype {
alias: symbol,
def_region: alias_header.region(),
differing_recursion_region,
});
is_nested_datatype = true;
}
// Either way, add the argument; not doing so would only result in more
// confusing error messages later on.
new_args.push(new_arg); new_args.push(new_arg);
} }
new_tags.push((tag_name.clone(), new_args)); new_tags.push((tag_name.clone(), new_args));
} }
Type::RecursiveTagUnion(rec_var, new_tags, ext) if is_nested_datatype {
// We don't have a way to represent nested data types; hence, we don't actually
// use the recursion var in them, and should avoid marking them as such.
Type::TagUnion(new_tags, ext)
} else {
Type::RecursiveTagUnion(rec_var, new_tags, ext)
}
} else { } else {
inner_type inner_type
}; };

View file

@ -277,7 +277,7 @@ pub fn canonicalize_defs<'a>(
let mut can_vars: Vec<Loc<(Lowercase, Variable)>> = Vec::with_capacity(vars.len()); let mut can_vars: Vec<Loc<(Lowercase, Variable)>> = Vec::with_capacity(vars.len());
let mut is_phantom = false; let mut is_phantom = false;
for loc_lowercase in vars { for loc_lowercase in vars.iter() {
if let Some(var) = can_ann if let Some(var) = can_ann
.introduced_variables .introduced_variables
.var_by_name(&loc_lowercase.value) .var_by_name(&loc_lowercase.value)
@ -303,10 +303,18 @@ pub fn canonicalize_defs<'a>(
continue; continue;
} }
let mut is_nested_datatype = false;
if can_ann.typ.contains_symbol(symbol) { if can_ann.typ.contains_symbol(symbol) {
make_tag_union_recursive( let alias_args = can_vars
.iter()
.map(|l| (l.value.0.clone(), Type::Variable(l.value.1)))
.collect::<Vec<_>>();
let alias_region =
Region::across_all([name.region].iter().chain(vars.iter().map(|l| &l.region)));
let made_recursive = make_tag_union_recursive(
env, env,
symbol, Loc::at(alias_region, (symbol, &alias_args)),
name.region, name.region,
vec![], vec![],
&mut can_ann.typ, &mut can_ann.typ,
@ -315,6 +323,13 @@ pub fn canonicalize_defs<'a>(
// recursion errors after the sorted introductions are complete. // recursion errors after the sorted introductions are complete.
&mut false, &mut false,
); );
is_nested_datatype = made_recursive.is_err();
}
if is_nested_datatype {
// Bail out
continue;
} }
scope.add_alias(symbol, name.region, can_vars.clone(), can_ann.typ.clone()); scope.add_alias(symbol, name.region, can_vars.clone(), can_ann.typ.clone());
@ -1624,9 +1639,16 @@ fn correct_mutual_recursive_type_alias<'a>(
var_store, var_store,
&mut ImSet::default(), &mut ImSet::default(),
); );
make_tag_union_recursive(
let alias_args = &alias
.type_variables
.iter()
.map(|l| (l.value.0.clone(), Type::Variable(l.value.1)))
.collect::<Vec<_>>();
let _made_recursive = make_tag_union_recursive(
env, env,
*rec, Loc::at(alias.header_region(), (*rec, &alias_args)),
alias.region, alias.region,
others, others,
&mut alias.typ, &mut alias.typ,
@ -1640,25 +1662,71 @@ fn correct_mutual_recursive_type_alias<'a>(
} }
} }
/// Attempt to make a tag union recursive at the position of `recursive_alias`; for example,
///
/// ```roc
/// [ Cons a (ConsList a), Nil ] as ConsList a
/// ```
///
/// can be made recursive at the position "ConsList a" with a fresh recursive variable, say r1:
///
/// ```roc
/// [ Cons a r1, Nil ] as r1
/// ```
///
/// Returns `Err` if the tag union is recursive, but there is no structure-preserving recursion
/// variable for it. This can happen when the type is a nested datatype, for example in either of
///
/// ```roc
/// Nested a : [ Chain a (Nested (List a)), Term ]
/// DuoList a b : [ Cons a (DuoList b a), Nil ]
/// ```
///
/// When `Err` is returned, a problem will be added to `env`.
fn make_tag_union_recursive<'a>( fn make_tag_union_recursive<'a>(
env: &mut Env<'a>, env: &mut Env<'a>,
symbol: Symbol, recursive_alias: Loc<(Symbol, &[(Lowercase, Type)])>,
region: Region, region: Region,
others: Vec<Symbol>, others: Vec<Symbol>,
typ: &mut Type, typ: &mut Type,
var_store: &mut VarStore, var_store: &mut VarStore,
can_report_error: &mut bool, can_report_error: &mut bool,
) { ) -> Result<(), ()> {
let Loc {
value: (symbol, args),
region: alias_region,
} = recursive_alias;
let vars = args.iter().map(|(_, t)| t.clone()).collect::<Vec<_>>();
match typ { match typ {
Type::TagUnion(tags, ext) => { Type::TagUnion(tags, ext) => {
let rec_var = var_store.fresh(); let rec_var = var_store.fresh();
*typ = Type::RecursiveTagUnion(rec_var, tags.to_vec(), ext.clone()); let mut pending_typ = Type::RecursiveTagUnion(rec_var, tags.to_vec(), ext.clone());
typ.substitute_alias(symbol, &Type::Variable(rec_var)); let substitution_result =
pending_typ.substitute_alias(symbol, &vars, &Type::Variable(rec_var));
match substitution_result {
Ok(()) => {
// We can substitute the alias presence for the variable exactly.
*typ = pending_typ;
Ok(())
}
Err(differing_recursion_region) => {
env.problems.push(Problem::NestedDatatype {
alias: symbol,
def_region: alias_region,
differing_recursion_region,
});
Err(())
}
}
} }
Type::RecursiveTagUnion(_, _, _) => {} Type::RecursiveTagUnion(_, _, _) => Ok(()),
Type::Alias { actual, .. } => make_tag_union_recursive( Type::Alias {
actual,
type_arguments,
..
} => make_tag_union_recursive(
env, env,
symbol, Loc::at_zero((symbol, &type_arguments)),
region, region,
others, others,
actual, actual,
@ -1676,6 +1744,7 @@ fn make_tag_union_recursive<'a>(
let problem = Problem::CyclicAlias(symbol, region, others); let problem = Problem::CyclicAlias(symbol, region, others);
env.problems.push(problem); env.problems.push(problem);
} }
Ok(())
} }
} }
} }

View file

@ -71,7 +71,7 @@ pub fn exists(flex_vars: Vec<Variable>, constraint: Constraint) -> Constraint {
#[inline(always)] #[inline(always)]
pub fn builtin_type(symbol: Symbol, args: Vec<Type>) -> Type { pub fn builtin_type(symbol: Symbol, args: Vec<Type>) -> Type {
Type::Apply(symbol, args) Type::Apply(symbol, args, Region::zero())
} }
#[inline(always)] #[inline(always)]

View file

@ -232,6 +232,16 @@ pub struct AliasHeader<'a> {
pub vars: &'a [Loc<Pattern<'a>>], pub vars: &'a [Loc<Pattern<'a>>],
} }
impl<'a> AliasHeader<'a> {
pub fn region(&self) -> Region {
Region::across_all(
[self.name.region]
.iter()
.chain(self.vars.iter().map(|v| &v.region)),
)
}
}
#[derive(Debug, Clone, Copy, PartialEq)] #[derive(Debug, Clone, Copy, PartialEq)]
pub enum Def<'a> { pub enum Def<'a> {
// TODO in canonicalization, validate the pattern; only certain patterns // TODO in canonicalization, validate the pattern; only certain patterns

View file

@ -78,6 +78,11 @@ pub enum Problem {
InvalidInterpolation(Region), InvalidInterpolation(Region),
InvalidHexadecimal(Region), InvalidHexadecimal(Region),
InvalidUnicodeCodePt(Region), InvalidUnicodeCodePt(Region),
NestedDatatype {
alias: Symbol,
def_region: Region,
differing_recursion_region: Region,
},
} }
#[derive(Clone, Debug, PartialEq)] #[derive(Clone, Debug, PartialEq)]

View file

@ -85,7 +85,7 @@ impl SolvedType {
match typ { match typ {
EmptyRec => SolvedType::EmptyRecord, EmptyRec => SolvedType::EmptyRecord,
EmptyTagUnion => SolvedType::EmptyTagUnion, EmptyTagUnion => SolvedType::EmptyTagUnion,
Apply(symbol, types) => { Apply(symbol, types, _) => {
let mut solved_types = Vec::with_capacity(types.len()); let mut solved_types = Vec::with_capacity(types.len());
for typ in types { for typ in types {
@ -454,7 +454,7 @@ pub fn to_type(
new_args.push(to_type(arg, free_vars, var_store)); new_args.push(to_type(arg, free_vars, var_store));
} }
Type::Apply(*symbol, new_args) Type::Apply(*symbol, new_args, Region::zero())
} }
Rigid(lowercase) => { Rigid(lowercase) => {
if let Some(var) = free_vars.named_vars.get(lowercase) { if let Some(var) = free_vars.named_vars.get(lowercase) {

View file

@ -94,13 +94,18 @@ impl RecordField<Type> {
} }
} }
pub fn substitute_alias(&mut self, rep_symbol: Symbol, actual: &Type) { pub fn substitute_alias(
&mut self,
rep_symbol: Symbol,
rep_args: &[Type],
actual: &Type,
) -> Result<(), Region> {
use RecordField::*; use RecordField::*;
match self { match self {
Optional(typ) => typ.substitute_alias(rep_symbol, actual), Optional(typ) => typ.substitute_alias(rep_symbol, rep_args, actual),
Required(typ) => typ.substitute_alias(rep_symbol, actual), Required(typ) => typ.substitute_alias(rep_symbol, rep_args, actual),
Demanded(typ) => typ.substitute_alias(rep_symbol, actual), Demanded(typ) => typ.substitute_alias(rep_symbol, rep_args, actual),
} }
} }
@ -189,7 +194,7 @@ pub enum Type {
}, },
RecursiveTagUnion(Variable, Vec<(TagName, Vec<Type>)>, Box<Type>), RecursiveTagUnion(Variable, Vec<(TagName, Vec<Type>)>, Box<Type>),
/// Applying a type to some arguments (e.g. Dict.Dict String Int) /// Applying a type to some arguments (e.g. Dict.Dict String Int)
Apply(Symbol, Vec<Type>), Apply(Symbol, Vec<Type>, Region),
Variable(Variable), Variable(Variable),
/// A type error, which will code gen to a runtime error /// A type error, which will code gen to a runtime error
Erroneous(Problem), Erroneous(Problem),
@ -220,7 +225,7 @@ impl fmt::Debug for Type {
} }
Type::Variable(var) => write!(f, "<{:?}>", var), Type::Variable(var) => write!(f, "<{:?}>", var),
Type::Apply(symbol, args) => { Type::Apply(symbol, args, _) => {
write!(f, "({:?}", symbol)?; write!(f, "({:?}", symbol)?;
for arg in args { for arg in args {
@ -539,7 +544,7 @@ impl Type {
} }
actual_type.substitute(substitutions); actual_type.substitute(substitutions);
} }
Apply(_, args) => { Apply(_, args, _) => {
for arg in args { for arg in args {
arg.substitute(substitutions); arg.substitute(substitutions);
} }
@ -549,62 +554,69 @@ impl Type {
} }
} }
// swap Apply with Alias if their module and tag match /// Swap Apply(rep_symbol, rep_args) with `actual`. Returns `Err` if there is an
pub fn substitute_alias(&mut self, rep_symbol: Symbol, actual: &Type) { /// `Apply(rep_symbol, _)`, but the args don't match.
pub fn substitute_alias(
&mut self,
rep_symbol: Symbol,
rep_args: &[Type],
actual: &Type,
) -> Result<(), Region> {
use Type::*; use Type::*;
match self { match self {
Function(args, closure, ret) => { Function(args, closure, ret) => {
for arg in args { for arg in args {
arg.substitute_alias(rep_symbol, actual); arg.substitute_alias(rep_symbol, rep_args, actual)?;
} }
closure.substitute_alias(rep_symbol, actual); closure.substitute_alias(rep_symbol, rep_args, actual)?;
ret.substitute_alias(rep_symbol, actual); ret.substitute_alias(rep_symbol, rep_args, actual)
}
FunctionOrTagUnion(_, _, ext) => {
ext.substitute_alias(rep_symbol, actual);
} }
FunctionOrTagUnion(_, _, ext) => ext.substitute_alias(rep_symbol, rep_args, actual),
RecursiveTagUnion(_, tags, ext) | TagUnion(tags, ext) => { RecursiveTagUnion(_, tags, ext) | TagUnion(tags, ext) => {
for (_, args) in tags { for (_, args) in tags {
for x in args { for x in args {
x.substitute_alias(rep_symbol, actual); x.substitute_alias(rep_symbol, rep_args, actual)?;
} }
} }
ext.substitute_alias(rep_symbol, actual); ext.substitute_alias(rep_symbol, rep_args, actual)
} }
Record(fields, ext) => { Record(fields, ext) => {
for (_, x) in fields.iter_mut() { for (_, x) in fields.iter_mut() {
x.substitute_alias(rep_symbol, actual); x.substitute_alias(rep_symbol, rep_args, actual)?;
} }
ext.substitute_alias(rep_symbol, actual); ext.substitute_alias(rep_symbol, rep_args, actual)
} }
Alias { Alias {
actual: alias_actual, actual: alias_actual,
.. ..
} => { } => alias_actual.substitute_alias(rep_symbol, rep_args, actual),
alias_actual.substitute_alias(rep_symbol, actual);
}
HostExposedAlias { HostExposedAlias {
actual: actual_type, actual: actual_type,
.. ..
} => { } => actual_type.substitute_alias(rep_symbol, rep_args, actual),
actual_type.substitute_alias(rep_symbol, actual); Apply(symbol, args, region) if *symbol == rep_symbol => {
} if args.len() == rep_args.len()
Apply(symbol, _) if *symbol == rep_symbol => { && args.iter().zip(rep_args.iter()).all(|(t1, t2)| t1 == t2)
*self = actual.clone(); {
*self = actual.clone();
if let Apply(_, args) = self { if let Apply(_, args, _) = self {
for arg in args { for arg in args {
arg.substitute_alias(rep_symbol, actual); arg.substitute_alias(rep_symbol, rep_args, actual)?;
}
} }
return Ok(());
} }
Err(*region)
} }
Apply(_, args) => { Apply(_, args, _) => {
for arg in args { for arg in args {
arg.substitute_alias(rep_symbol, actual); arg.substitute_alias(rep_symbol, rep_args, actual)?;
} }
Ok(())
} }
EmptyRec | EmptyTagUnion | ClosureTag { .. } | Erroneous(_) | Variable(_) => {} EmptyRec | EmptyTagUnion | ClosureTag { .. } | Erroneous(_) | Variable(_) => Ok(()),
} }
} }
@ -639,8 +651,8 @@ impl Type {
HostExposedAlias { name, actual, .. } => { HostExposedAlias { name, actual, .. } => {
name == &rep_symbol || actual.contains_symbol(rep_symbol) name == &rep_symbol || actual.contains_symbol(rep_symbol)
} }
Apply(symbol, _) if *symbol == rep_symbol => true, Apply(symbol, _, _) if *symbol == rep_symbol => true,
Apply(_, args) => args.iter().any(|arg| arg.contains_symbol(rep_symbol)), Apply(_, args, _) => args.iter().any(|arg| arg.contains_symbol(rep_symbol)),
EmptyRec | EmptyTagUnion | ClosureTag { .. } | Erroneous(_) | Variable(_) => false, EmptyRec | EmptyTagUnion | ClosureTag { .. } | Erroneous(_) | Variable(_) => false,
} }
} }
@ -676,7 +688,7 @@ impl Type {
.. ..
} => actual_type.contains_variable(rep_variable), } => actual_type.contains_variable(rep_variable),
HostExposedAlias { actual, .. } => actual.contains_variable(rep_variable), HostExposedAlias { actual, .. } => actual.contains_variable(rep_variable),
Apply(_, args) => args.iter().any(|arg| arg.contains_variable(rep_variable)), Apply(_, args, _) => args.iter().any(|arg| arg.contains_variable(rep_variable)),
EmptyRec | EmptyTagUnion | Erroneous(_) => false, EmptyRec | EmptyTagUnion | Erroneous(_) => false,
} }
} }
@ -753,7 +765,7 @@ impl Type {
actual_type.instantiate_aliases(region, aliases, var_store, introduced); actual_type.instantiate_aliases(region, aliases, var_store, introduced);
} }
Apply(symbol, args) => { Apply(symbol, args, _) => {
if let Some(alias) = aliases.get(symbol) { if let Some(alias) = aliases.get(symbol) {
if args.len() != alias.type_variables.len() { if args.len() != alias.type_variables.len() {
*self = Type::Erroneous(Problem::BadTypeArguments { *self = Type::Erroneous(Problem::BadTypeArguments {
@ -882,7 +894,7 @@ fn symbols_help(tipe: &Type, accum: &mut ImSet<Symbol>) {
accum.insert(*name); accum.insert(*name);
symbols_help(actual, accum); symbols_help(actual, accum);
} }
Apply(symbol, args) => { Apply(symbol, args, _) => {
accum.insert(*symbol); accum.insert(*symbol);
args.iter().for_each(|arg| symbols_help(arg, accum)); args.iter().for_each(|arg| symbols_help(arg, accum));
} }
@ -967,7 +979,7 @@ fn variables_help(tipe: &Type, accum: &mut ImSet<Variable>) {
} }
variables_help(actual, accum); variables_help(actual, accum);
} }
Apply(_, args) => { Apply(_, args, _) => {
for x in args { for x in args {
variables_help(x, accum); variables_help(x, accum);
} }
@ -1071,7 +1083,7 @@ fn variables_help_detailed(tipe: &Type, accum: &mut VariableDetail) {
} }
variables_help_detailed(actual, accum); variables_help_detailed(actual, accum);
} }
Apply(_, args) => { Apply(_, args, _) => {
for x in args { for x in args {
variables_help_detailed(x, accum); variables_help_detailed(x, accum);
} }
@ -1241,6 +1253,16 @@ pub struct Alias {
pub typ: Type, pub typ: Type,
} }
impl Alias {
pub fn header_region(&self) -> Region {
Region::across_all(
[self.region]
.iter()
.chain(self.type_variables.iter().map(|tv| &tv.region)),
)
}
}
#[derive(PartialEq, Eq, Debug, Clone, Hash)] #[derive(PartialEq, Eq, Debug, Clone, Hash)]
pub enum Problem { pub enum Problem {
CanonicalizationProblem, CanonicalizationProblem,

View file

@ -24,6 +24,7 @@ const CIRCULAR_DEF: &str = "CIRCULAR DEFINITION";
const DUPLICATE_NAME: &str = "DUPLICATE NAME"; const DUPLICATE_NAME: &str = "DUPLICATE NAME";
const VALUE_NOT_EXPOSED: &str = "NOT EXPOSED"; const VALUE_NOT_EXPOSED: &str = "NOT EXPOSED";
const MODULE_NOT_IMPORTED: &str = "MODULE NOT IMPORTED"; const MODULE_NOT_IMPORTED: &str = "MODULE NOT IMPORTED";
const NESTED_DATATYPE: &str = "NESTED DATATYPE";
pub fn can_problem<'b>( pub fn can_problem<'b>(
alloc: &'b RocDocAllocator<'b>, alloc: &'b RocDocAllocator<'b>,
@ -437,6 +438,34 @@ pub fn can_problem<'b>(
title = answer.1.to_string(); title = answer.1.to_string();
severity = Severity::RuntimeError; severity = Severity::RuntimeError;
} }
Problem::NestedDatatype {
alias,
def_region,
differing_recursion_region,
} => {
doc = alloc.stack(vec![
alloc.concat(vec![
alloc.symbol_unqualified(alias),
alloc.reflow(" is a nested datatype. Here is one recursive usage of it:"),
]),
alloc.region(lines.convert_region(differing_recursion_region)),
alloc.concat(vec![
alloc.reflow("But recursive usages of "),
alloc.symbol_unqualified(alias),
alloc.reflow(" must match its definition:"),
]),
alloc.region(lines.convert_region(def_region)),
alloc.reflow("Nested datatypes are not supported in Roc."),
alloc.concat(vec![
alloc.hint("Consider rewriting the definition of "),
alloc.symbol_unqualified(alias),
alloc.text(" to use the recursive type with the same arguments."),
]),
]);
title = NESTED_DATATYPE.to_string();
severity = Severity::RuntimeError;
}
}; };
Report { Report {

View file

@ -7238,4 +7238,70 @@ I need all branches in an `if` to have the same type!
), ),
) )
} }
#[test]
fn nested_datatype() {
report_problem_as(
indoc!(
r#"
Nested a : [ Chain a (Nested (List a)), Term ]
s : Nested Str
s
"#
),
indoc!(
r#"
NESTED DATATYPE
`Nested` is a nested datatype. Here is one recursive usage of it:
1 Nested a : [ Chain a (Nested (List a)), Term ]
^^^^^^^^^^^^^^^
But recursive usages of `Nested` must match its definition:
1 Nested a : [ Chain a (Nested (List a)), Term ]
^^^^^^^^
Nested datatypes are not supported in Roc.
Hint: Consider rewriting the definition of `Nested` to use the recursive type with the same arguments.
"#
),
)
}
#[test]
fn nested_datatype_inline() {
report_problem_as(
indoc!(
r#"
f : {} -> [ Chain a (Nested (List a)), Term ] as Nested a
f
"#
),
indoc!(
r#"
NESTED DATATYPE
`Nested` is a nested datatype. Here is one recursive usage of it:
1 f : {} -> [ Chain a (Nested (List a)), Term ] as Nested a
^^^^^^^^^^^^^^^
But recursive usages of `Nested` must match its definition:
1 f : {} -> [ Chain a (Nested (List a)), Term ] as Nested a
^^^^^^^^
Nested datatypes are not supported in Roc.
Hint: Consider rewriting the definition of `Nested` to use the recursive type with the same arguments.
"#
),
)
}
} }