From d2551bb2bdc19417bd6611283dd1da70e5894e37 Mon Sep 17 00:00:00 2001 From: Sergey Kolosov Date: Tue, 16 Apr 2024 21:34:55 +0300 Subject: [PATCH] Add support for .tar.bz2 source distributions (#3069) ## Summary Source distributions in the .tar.bz2 format are still relatively common within the existing code-bases, namely, the most common examples are the Twisted source distributions up to the version 20.3.0. As quite so often the ability to upgrade Twisted to a more recent version is not available for a given project, we add the support for .tar.bz2 here to still allow `uv` to be a drop-in replacement for `pip` in these projects. ## Test Plan The feature was tested both by adding the corresponding test coverage, and by directly installing a package of interest under a Python version that doesn't have the corresponding wheel: ```sh cargo run venv -p python3.8 cargo run pip install Twisted==20.3.0 --no-cache ``` The `--no-cache` argument in the example above serves the purpose of cleaning the cached information regarding the unsatisfiability of the requirements, as it may have been cached during some previous attempt to install this package by `uv` version that didn't implement this feature yet. --- Cargo.lock | 22 ++++++++++++ .../distribution-filename/src/source_dist.rs | 9 ++++- crates/uv-extract/Cargo.toml | 2 +- crates/uv-extract/src/seek.rs | 17 ++++++++- crates/uv-extract/src/stream.rs | 33 ++++++++++++++++- crates/uv/tests/pip_sync.rs | 36 +++++++++++++++++++ 6 files changed, 115 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f63f49f1b..2dfd2beab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -205,6 +205,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07dbbf24db18d609b1462965249abdf49129ccad073ec257da372adc83259c60" dependencies = [ "brotli", + "bzip2", "flate2", "futures-core", "futures-io", @@ -506,6 +507,27 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cache-key" version = "0.0.1" diff --git a/crates/distribution-filename/src/source_dist.rs b/crates/distribution-filename/src/source_dist.rs index b9447272d..80c8b70ad 100644 --- a/crates/distribution-filename/src/source_dist.rs +++ b/crates/distribution-filename/src/source_dist.rs @@ -19,6 +19,7 @@ use uv_normalize::{InvalidNameError, PackageName}; pub enum SourceDistExtension { Zip, TarGz, + TarBz2, } impl FromStr for SourceDistExtension { @@ -28,6 +29,7 @@ impl FromStr for SourceDistExtension { Ok(match s { "zip" => Self::Zip, "tar.gz" => Self::TarGz, + "tar.bz2" => Self::TarBz2, other => return Err(other.to_string()), }) } @@ -38,6 +40,7 @@ impl Display for SourceDistExtension { match self { Self::Zip => f.write_str("zip"), Self::TarGz => f.write_str("tar.gz"), + Self::TarBz2 => f.write_str("tar.bz2"), } } } @@ -50,6 +53,9 @@ impl SourceDistExtension { if let Some(stem) = filename.strip_suffix(".tar.gz") { return Some((stem, Self::TarGz)); } + if let Some(stem) = filename.strip_suffix(".tar.bz2") { + return Some((stem, Self::TarBz2)); + } None } } @@ -182,7 +188,7 @@ impl Display for SourceDistFilenameError { enum SourceDistFilenameErrorKind { #[error("Name doesn't start with package name {0}")] Filename(PackageName), - #[error("Source distributions filenames must end with .zip or .tar.gz")] + #[error("Source distributions filenames must end with .zip, .tar.gz, or .tar.bz2")] Extension, #[error("Version section is invalid")] Version(#[from] VersionParseError), @@ -207,6 +213,7 @@ mod tests { "foo-lib-1.2.3.zip", "foo-lib-1.2.3a3.zip", "foo-lib-1.2.3.tar.gz", + "foo-lib-1.2.3.tar.bz2", ] { assert_eq!( SourceDistFilename::parse(normalized, &PackageName::from_str("foo_lib").unwrap()) diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index 0c3dbb8fb..61b8a7949 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -15,7 +15,7 @@ workspace = true [dependencies] pypi-types = { workspace = true } -async-compression = { workspace = true, features = ["gzip", "zstd"] } +async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd"] } async_zip = { workspace = true, features = ["tokio"] } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } diff --git a/crates/uv-extract/src/seek.rs b/crates/uv-extract/src/seek.rs index 57499fc86..5c5cebd5e 100644 --- a/crates/uv-extract/src/seek.rs +++ b/crates/uv-extract/src/seek.rs @@ -80,7 +80,7 @@ pub async fn unzip( Ok(()) } -/// Unzip a `.zip` or `.tar.gz` archive into the target directory, requiring `Seek`. +/// Unzip a `.zip`, `.tar.gz`, or `.tar.bz2` archive into the target directory, requiring `Seek`. pub async fn archive( reader: R, source: impl AsRef, @@ -111,6 +111,21 @@ pub async fn archive( return Ok(()); } + // `.tar.bz2` + if source + .as_ref() + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("bz2")) + && source.as_ref().file_stem().is_some_and(|stem| { + Path::new(stem) + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) + }) + { + crate::stream::untar_bz2(reader, target).await?; + return Ok(()); + } + // `.tar.zst` if source .as_ref() diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index e73db2ae4..f73839a29 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -165,6 +165,23 @@ pub async fn untar_gz( Ok(()) } +/// Unzip a `.tar.bz2` archive into the target directory, without requiring `Seek`. +/// +/// This is useful for unpacking files as they're being downloaded. +pub async fn untar_bz2( + reader: R, + target: impl AsRef, +) -> Result<(), Error> { + let reader = tokio::io::BufReader::new(reader); + let decompressed_bytes = async_compression::tokio::bufread::BzDecoder::new(reader); + + let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes) + .set_preserve_mtime(false) + .build(); + untar_in(&mut archive, target.as_ref()).await?; + Ok(()) +} + /// Unzip a `.tar.zst` archive into the target directory, without requiring `Seek`. /// /// This is useful for unpacking files as they're being downloaded. @@ -181,7 +198,7 @@ pub async fn untar_zst( Ok(untar_in(&mut archive, target.as_ref()).await?) } -/// Unzip a `.zip` or `.tar.gz` archive into the target directory, without requiring `Seek`. +/// Unzip a `.zip`, `.tar.gz`, or `.tar.bz2` archive into the target directory, without requiring `Seek`. pub async fn archive( reader: R, source: impl AsRef, @@ -212,6 +229,20 @@ pub async fn archive( return Ok(()); } + // `.tar.bz2` + if source + .as_ref() + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("bz2")) + && source.as_ref().file_stem().is_some_and(|stem| { + Path::new(stem) + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) + }) + { + untar_bz2(reader, target).await?; + return Ok(()); + } // `.tar.zst` if source .as_ref() diff --git a/crates/uv/tests/pip_sync.rs b/crates/uv/tests/pip_sync.rs index 7cecbba56..fe331d1a1 100644 --- a/crates/uv/tests/pip_sync.rs +++ b/crates/uv/tests/pip_sync.rs @@ -666,6 +666,42 @@ fn install_sdist_url() -> Result<()> { Ok(()) } +/// Install a package with source archive format `.tar.bz2`. +#[test] +fn install_sdist_archive_type_bz2() -> Result<()> { + let context = TestContext::new("3.8"); + + // Install a version of Twisted that uses `.tar.bz2`. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("Twisted==20.3.0")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--no-binary") + .arg(":all:") + .arg("--strict"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + twisted==20.3.0 + warning: The package `twisted` requires `zope-interface>=4.4.2`, but it's not installed. + warning: The package `twisted` requires `constantly>=15.1`, but it's not installed. + warning: The package `twisted` requires `incremental>=16.10.1`, but it's not installed. + warning: The package `twisted` requires `automat>=0.3.0`, but it's not installed. + warning: The package `twisted` requires `hyperlink>=17.1.1`, but it's not installed. + warning: The package `twisted` requires `pyhamcrest!=1.10.0,>=1.9.0`, but it's not installed. + warning: The package `twisted` requires `attrs>=19.2.0`, but it's not installed. + "### + ); + + Ok(()) +} + /// Attempt to re-install a package into a virtual environment from a URL. The second install /// should be a no-op. #[test]