Add support for .tar.bz2 source distributions (#3069)

## Summary

Source distributions in the .tar.bz2 format are still relatively common
within the existing code-bases, namely, the most common examples are the
Twisted source distributions up to the version 20.3.0. As quite so often
the ability to upgrade Twisted to a more recent version is not available
for a given project, we add the support for .tar.bz2 here to still allow
`uv` to be a drop-in replacement for `pip` in these projects.

## Test Plan

The feature was tested both by adding the corresponding test coverage,
and by directly installing a package of interest under a Python version
that doesn't have the corresponding wheel:

```sh
cargo run venv -p python3.8
cargo run pip install Twisted==20.3.0 --no-cache
```

The `--no-cache` argument in the example above serves the purpose of
cleaning the cached information regarding the unsatisfiability of the
requirements, as it may have been cached during some previous attempt to
install this package by `uv` version that didn't implement this feature
yet.
This commit is contained in:
Sergey Kolosov 2024-04-16 21:34:55 +03:00 committed by GitHub
parent e78bbb8f6a
commit d2551bb2bd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 115 additions and 4 deletions

22
Cargo.lock generated
View file

@ -205,6 +205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07dbbf24db18d609b1462965249abdf49129ccad073ec257da372adc83259c60"
dependencies = [
"brotli",
"bzip2",
"flate2",
"futures-core",
"futures-io",
@ -506,6 +507,27 @@ version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9"
[[package]]
name = "bzip2"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
dependencies = [
"bzip2-sys",
"libc",
]
[[package]]
name = "bzip2-sys"
version = "0.1.11+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "cache-key"
version = "0.0.1"

View file

@ -19,6 +19,7 @@ use uv_normalize::{InvalidNameError, PackageName};
pub enum SourceDistExtension {
Zip,
TarGz,
TarBz2,
}
impl FromStr for SourceDistExtension {
@ -28,6 +29,7 @@ impl FromStr for SourceDistExtension {
Ok(match s {
"zip" => Self::Zip,
"tar.gz" => Self::TarGz,
"tar.bz2" => Self::TarBz2,
other => return Err(other.to_string()),
})
}
@ -38,6 +40,7 @@ impl Display for SourceDistExtension {
match self {
Self::Zip => f.write_str("zip"),
Self::TarGz => f.write_str("tar.gz"),
Self::TarBz2 => f.write_str("tar.bz2"),
}
}
}
@ -50,6 +53,9 @@ impl SourceDistExtension {
if let Some(stem) = filename.strip_suffix(".tar.gz") {
return Some((stem, Self::TarGz));
}
if let Some(stem) = filename.strip_suffix(".tar.bz2") {
return Some((stem, Self::TarBz2));
}
None
}
}
@ -182,7 +188,7 @@ impl Display for SourceDistFilenameError {
enum SourceDistFilenameErrorKind {
#[error("Name doesn't start with package name {0}")]
Filename(PackageName),
#[error("Source distributions filenames must end with .zip or .tar.gz")]
#[error("Source distributions filenames must end with .zip, .tar.gz, or .tar.bz2")]
Extension,
#[error("Version section is invalid")]
Version(#[from] VersionParseError),
@ -207,6 +213,7 @@ mod tests {
"foo-lib-1.2.3.zip",
"foo-lib-1.2.3a3.zip",
"foo-lib-1.2.3.tar.gz",
"foo-lib-1.2.3.tar.bz2",
] {
assert_eq!(
SourceDistFilename::parse(normalized, &PackageName::from_str("foo_lib").unwrap())

View file

@ -15,7 +15,7 @@ workspace = true
[dependencies]
pypi-types = { workspace = true }
async-compression = { workspace = true, features = ["gzip", "zstd"] }
async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd"] }
async_zip = { workspace = true, features = ["tokio"] }
fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }

View file

@ -80,7 +80,7 @@ pub async fn unzip<R: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin>(
Ok(())
}
/// Unzip a `.zip` or `.tar.gz` archive into the target directory, requiring `Seek`.
/// Unzip a `.zip`, `.tar.gz`, or `.tar.bz2` archive into the target directory, requiring `Seek`.
pub async fn archive<R: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin>(
reader: R,
source: impl AsRef<Path>,
@ -111,6 +111,21 @@ pub async fn archive<R: tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin>(
return Ok(());
}
// `.tar.bz2`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("bz2"))
&& source.as_ref().file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
})
{
crate::stream::untar_bz2(reader, target).await?;
return Ok(());
}
// `.tar.zst`
if source
.as_ref()

View file

@ -165,6 +165,23 @@ pub async fn untar_gz<R: tokio::io::AsyncRead + Unpin>(
Ok(())
}
/// Unzip a `.tar.bz2` archive into the target directory, without requiring `Seek`.
///
/// This is useful for unpacking files as they're being downloaded.
pub async fn untar_bz2<R: tokio::io::AsyncRead + Unpin>(
reader: R,
target: impl AsRef<Path>,
) -> Result<(), Error> {
let reader = tokio::io::BufReader::new(reader);
let decompressed_bytes = async_compression::tokio::bufread::BzDecoder::new(reader);
let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes)
.set_preserve_mtime(false)
.build();
untar_in(&mut archive, target.as_ref()).await?;
Ok(())
}
/// Unzip a `.tar.zst` archive into the target directory, without requiring `Seek`.
///
/// This is useful for unpacking files as they're being downloaded.
@ -181,7 +198,7 @@ pub async fn untar_zst<R: tokio::io::AsyncRead + Unpin>(
Ok(untar_in(&mut archive, target.as_ref()).await?)
}
/// Unzip a `.zip` or `.tar.gz` archive into the target directory, without requiring `Seek`.
/// Unzip a `.zip`, `.tar.gz`, or `.tar.bz2` archive into the target directory, without requiring `Seek`.
pub async fn archive<R: tokio::io::AsyncRead + Unpin>(
reader: R,
source: impl AsRef<Path>,
@ -212,6 +229,20 @@ pub async fn archive<R: tokio::io::AsyncRead + Unpin>(
return Ok(());
}
// `.tar.bz2`
if source
.as_ref()
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("bz2"))
&& source.as_ref().file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|ext| ext.eq_ignore_ascii_case("tar"))
})
{
untar_bz2(reader, target).await?;
return Ok(());
}
// `.tar.zst`
if source
.as_ref()

View file

@ -666,6 +666,42 @@ fn install_sdist_url() -> Result<()> {
Ok(())
}
/// Install a package with source archive format `.tar.bz2`.
#[test]
fn install_sdist_archive_type_bz2() -> Result<()> {
let context = TestContext::new("3.8");
// Install a version of Twisted that uses `.tar.bz2`.
let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt.write_str("Twisted==20.3.0")?;
uv_snapshot!(command(&context)
.arg("requirements.txt")
.arg("--no-binary")
.arg(":all:")
.arg("--strict"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Installed 1 package in [TIME]
+ twisted==20.3.0
warning: The package `twisted` requires `zope-interface>=4.4.2`, but it's not installed.
warning: The package `twisted` requires `constantly>=15.1`, but it's not installed.
warning: The package `twisted` requires `incremental>=16.10.1`, but it's not installed.
warning: The package `twisted` requires `automat>=0.3.0`, but it's not installed.
warning: The package `twisted` requires `hyperlink>=17.1.1`, but it's not installed.
warning: The package `twisted` requires `pyhamcrest!=1.10.0,>=1.9.0`, but it's not installed.
warning: The package `twisted` requires `attrs>=19.2.0`, but it's not installed.
"###
);
Ok(())
}
/// Attempt to re-install a package into a virtual environment from a URL. The second install
/// should be a no-op.
#[test]