Allow files >4GB on 32-bit platforms (#847)

Changes `File::size` from a `usize` to a `u64`.

The motivations are that with tensorflow wheels being 475 MB
(https://pypi.org/project/tensorflow/2.15.0.post1/#files), we're already
only one order of magnitude away and to avoid target dependent failures.
This commit is contained in:
konsti 2024-01-09 17:31:49 +01:00 committed by GitHub
parent ee3a6431c7
commit 5b0b072e3c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 26 additions and 20 deletions

View file

@ -11,7 +11,7 @@ pub struct File {
pub filename: String,
pub hashes: Hashes,
pub requires_python: Option<VersionSpecifiers>,
pub size: Option<usize>,
pub size: Option<u64>,
pub upload_time: Option<DateTime<Utc>>,
pub url: String,
pub yanked: Option<Yanked>,

View file

@ -506,7 +506,7 @@ impl RemoteSource for File {
Ok(&self.filename)
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
self.size
}
}
@ -518,7 +518,7 @@ impl RemoteSource for Url {
.ok_or_else(|| Error::UrlFilename(self.clone()))
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
None
}
}
@ -528,7 +528,7 @@ impl RemoteSource for RegistryBuiltDist {
self.file.filename()
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
self.file.size()
}
}
@ -538,7 +538,7 @@ impl RemoteSource for RegistrySourceDist {
self.file.filename()
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
self.file.size()
}
}
@ -548,7 +548,7 @@ impl RemoteSource for DirectUrlBuiltDist {
self.url.filename()
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
self.url.size()
}
}
@ -558,7 +558,7 @@ impl RemoteSource for DirectUrlSourceDist {
self.url.filename()
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
self.url.size()
}
}
@ -572,7 +572,7 @@ impl RemoteSource for GitSourceDist {
})
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
self.url.size()
}
}
@ -582,7 +582,7 @@ impl RemoteSource for PathBuiltDist {
self.url.filename()
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
self.url.size()
}
}
@ -592,7 +592,7 @@ impl RemoteSource for PathSourceDist {
self.url.filename()
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
self.url.size()
}
}
@ -607,7 +607,7 @@ impl RemoteSource for SourceDist {
}
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
match self {
Self::Registry(dist) => dist.size(),
Self::DirectUrl(dist) => dist.size(),
@ -626,7 +626,7 @@ impl RemoteSource for BuiltDist {
}
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
match self {
Self::Registry(dist) => dist.size(),
Self::DirectUrl(dist) => dist.size(),
@ -643,7 +643,7 @@ impl RemoteSource for Dist {
}
}
fn size(&self) -> Option<usize> {
fn size(&self) -> Option<u64> {
match self {
Self::Built(dist) => dist.size(),
Self::Source(dist) => dist.size(),

View file

@ -53,7 +53,7 @@ pub trait RemoteSource {
fn filename(&self) -> Result<&str, Error>;
/// Return the size of the distribution, if known.
fn size(&self) -> Option<usize>;
fn size(&self) -> Option<u64>;
}
pub trait Identifier {

View file

@ -119,7 +119,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
let reader = self.client.stream_external(&url).await?;
// If the file is greater than 5MB, write it to disk; otherwise, keep it in memory.
let byte_size = wheel.file.size.map(|size| ByteSize::b(size as u64));
let byte_size = wheel.file.size.map(ByteSize::b);
let local_wheel = if let Some(byte_size) =
byte_size.filter(|byte_size| *byte_size < ByteSize::mb(5))
{
@ -133,7 +133,14 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context>
);
// Read into a buffer.
let mut buffer = Vec::with_capacity(wheel.file.size.unwrap_or(0));
let mut buffer = Vec::with_capacity(
wheel
.file
.size
.unwrap_or(0)
.try_into()
.expect("5MB shouldn't be bigger usize::MAX"),
);
let mut reader = tokio::io::BufReader::new(reader.compat());
tokio::io::copy(&mut reader, &mut buffer).await?;

View file

@ -87,9 +87,8 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> {
in_flight: &OnceMap<PathBuf, Result<CachedDist, String>>,
) -> Result<Vec<CachedDist>, Error> {
// Sort the distributions by size.
distributions.sort_unstable_by_key(|distribution| {
Reverse(distribution.size().unwrap_or(usize::MAX))
});
distributions
.sort_unstable_by_key(|distribution| Reverse(distribution.size().unwrap_or(u64::MAX)));
let wheels = self
.download_stream(distributions, in_flight)

View file

@ -28,7 +28,7 @@ pub struct File {
/// still fails, we skip the file when creating a version map.
#[serde(default, deserialize_with = "deserialize_version_specifiers_lenient")]
pub requires_python: Option<Result<VersionSpecifiers, VersionSpecifiersParseError>>,
pub size: Option<usize>,
pub size: Option<u64>,
pub upload_time: Option<DateTime<Utc>>,
pub url: String,
pub yanked: Option<Yanked>,