diff --git a/Cargo.lock b/Cargo.lock index bad56abcd..9a1631f88 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3069,9 +3069,9 @@ dependencies = [ [[package]] name = "reqwest-middleware" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88a3e86aa6053e59030e7ce2d2a3b258dd08fc2d337d52f73f6cb480f5858690" +checksum = "5a735987236a8e238bf0296c7e351b999c188ccc11477f311b82b55c93984216" dependencies = [ "anyhow", "async-trait", @@ -4363,6 +4363,7 @@ dependencies = [ "filetime", "flate2", "fs-err", + "futures", "indexmap 2.2.5", "indicatif", "indoc", @@ -4400,6 +4401,7 @@ dependencies = [ "uv-cache", "uv-client", "uv-dispatch", + "uv-distribution", "uv-fs", "uv-installer", "uv-interpreter", @@ -4613,6 +4615,7 @@ dependencies = [ "platform-tags", "pypi-types", "reqwest", + "reqwest-middleware", "rmp-serde", "rustc-hash", "serde", diff --git a/crates/uv-client/src/error.rs b/crates/uv-client/src/error.rs index 542001d73..6d51d08f9 100644 --- a/crates/uv-client/src/error.rs +++ b/crates/uv-client/src/error.rs @@ -225,6 +225,7 @@ impl From for ErrorKind { Self::ReqwestError(BetterReqwestError::from(error)) } } + impl From for ErrorKind { fn from(error: reqwest_middleware::Error) -> Self { if let reqwest_middleware::Error::Middleware(ref underlying) = error { diff --git a/crates/uv-distribution/Cargo.toml b/crates/uv-distribution/Cargo.toml index 28d757340..c3578de3b 100644 --- a/crates/uv-distribution/Cargo.toml +++ b/crates/uv-distribution/Cargo.toml @@ -34,6 +34,7 @@ fs-err = { workspace = true } futures = { workspace = true } nanoid = { workspace = true } reqwest = { workspace = true } +reqwest-middleware = { workspace = true } rmp-serde = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } diff --git a/crates/uv-distribution/src/error.rs b/crates/uv-distribution/src/error.rs index 3bf8cd325..d53a7bfcd 100644 --- a/crates/uv-distribution/src/error.rs +++ b/crates/uv-distribution/src/error.rs @@ -63,6 +63,13 @@ pub enum Error { MissingPkgInfo, #[error("The source distribution does not support static metadata")] DynamicPkgInfo(#[source] pypi_types::Error), + #[error("Unsupported scheme in URL: {0}")] + UnsupportedScheme(String), + + /// A generic request middleware error happened while making a request. + /// Refer to the error message for more details. + #[error(transparent)] + ReqwestMiddlewareError(#[from] anyhow::Error), /// Should not occur; only seen when another task panicked. #[error("The task executor is broken, did some other task panic?")] @@ -74,3 +81,14 @@ impl From for Error { Self::Reqwest(BetterReqwestError::from(error)) } } + +impl From for Error { + fn from(error: reqwest_middleware::Error) -> Self { + match error { + reqwest_middleware::Error::Middleware(error) => Self::ReqwestMiddlewareError(error), + reqwest_middleware::Error::Reqwest(error) => { + Self::Reqwest(BetterReqwestError::from(error)) + } + } + } +} diff --git a/crates/uv-distribution/src/lib.rs b/crates/uv-distribution/src/lib.rs index 94283796d..43d4c7054 100644 --- a/crates/uv-distribution/src/lib.rs +++ b/crates/uv-distribution/src/lib.rs @@ -3,7 +3,7 @@ pub use download::{BuiltWheel, DiskWheel, LocalWheel}; pub use error::Error; pub use index::{BuiltWheelIndex, RegistryWheelIndex}; pub use reporter::Reporter; -pub use source::SourceDistCachedBuilder; +pub use source::{download_and_extract_archive, SourceDistCachedBuilder}; pub use unzip::Unzip; mod distribution_database; diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 8e9e9d665..da010fd35 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -20,11 +20,12 @@ use distribution_types::{ PathSourceDist, RemoteSource, SourceDist, }; use install_wheel_rs::metadata::read_archive_metadata; -use pep508_rs::VerbatimUrl; +use pep508_rs::{Scheme, VerbatimUrl}; use platform_tags::Tags; use pypi_types::Metadata23; use uv_cache::{ - ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Freshness, WheelCache, + ArchiveTimestamp, Cache, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Freshness, + WheelCache, }; use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, @@ -125,7 +126,9 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { }; // If necessary, extract the archive. - let extracted = self.extract_archive(&path_source_dist).await?; + let extracted = + extract_archive(&path_source_dist.path, self.build_context.cache()) + .await?; return self .path(source_dist, &path_source_dist, extracted.path()) @@ -157,7 +160,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } SourceDist::Path(path_source_dist) => { // If necessary, extract the archive. - let extracted = self.extract_archive(path_source_dist).await?; + let extracted = + extract_archive(&path_source_dist.path, self.build_context.cache()).await?; self.path(source_dist, path_source_dist, extracted.path()) .boxed() @@ -219,7 +223,9 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { }; // If necessary, extract the archive. - let extracted = self.extract_archive(&path_source_dist).await?; + let extracted = + extract_archive(&path_source_dist.path, self.build_context.cache()) + .await?; return self .path_metadata(source_dist, &path_source_dist, extracted.path()) @@ -253,7 +259,8 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } SourceDist::Path(path_source_dist) => { // If necessary, extract the archive. - let extracted = self.extract_archive(path_source_dist).await?; + let extracted = + extract_archive(&path_source_dist.path, self.build_context.cache()).await?; self.path_metadata(source_dist, path_source_dist, extracted.path()) .boxed() @@ -669,7 +676,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { source_dist: &SourceDist, git_source_dist: &GitSourceDist, ) -> Result { - let (fetch, subdirectory) = self.download_source_dist_git(&git_source_dist.url).await?; + let (fetch, subdirectory) = fetch_git_archive( + &git_source_dist.url, + self.build_context.cache(), + self.reporter.as_ref(), + ) + .await?; let git_sha = fetch.git().precise().expect("Exact commit after checkout"); let cache_shard = self.build_context.cache().shard( @@ -725,7 +737,12 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { source_dist: &SourceDist, git_source_dist: &GitSourceDist, ) -> Result { - let (fetch, subdirectory) = self.download_source_dist_git(&git_source_dist.url).await?; + let (fetch, subdirectory) = fetch_git_archive( + &git_source_dist.url, + self.build_context.cache(), + self.reporter.as_ref(), + ) + .await?; let git_sha = fetch.git().precise().expect("Exact commit after checkout"); let cache_shard = self.build_context.cache().shard( @@ -841,82 +858,6 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { Ok(cache_path) } - /// Download a source distribution from a Git repository. - async fn download_source_dist_git(&self, url: &Url) -> Result<(Fetch, Option), Error> { - debug!("Fetching source distribution from Git: {url}"); - let git_dir = self.build_context.cache().bucket(CacheBucket::Git); - - // Avoid races between different processes, too. - let lock_dir = git_dir.join("locks"); - fs::create_dir_all(&lock_dir) - .await - .map_err(Error::CacheWrite)?; - let canonical_url = cache_key::CanonicalUrl::new(url); - let _lock = LockedFile::acquire( - lock_dir.join(cache_key::digest(&canonical_url)), - &canonical_url, - ) - .map_err(Error::CacheWrite)?; - - let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?; - - let source = if let Some(reporter) = &self.reporter { - GitSource::new(url, git_dir).with_reporter(Facade::from(reporter.clone())) - } else { - GitSource::new(url, git_dir) - }; - let fetch = tokio::task::spawn_blocking(move || source.fetch()) - .await? - .map_err(Error::Git)?; - Ok((fetch, subdirectory)) - } - - /// Extract a local source distribution, if it's stored as a `.tar.gz` or `.zip` archive. - /// - /// TODO(charlie): Consider storing the extracted source in the cache, to avoid re-extracting - /// on every invocation. - async fn extract_archive( - &self, - source_dist: &'a PathSourceDist, - ) -> Result, Error> { - // If necessary, unzip the source distribution. - let path = source_dist.path.as_path(); - - let metadata = match fs::metadata(&path).await { - Ok(metadata) => metadata, - Err(err) if err.kind() == std::io::ErrorKind::NotFound => { - return Err(Error::NotFound(path.to_path_buf())); - } - Err(err) => return Err(Error::CacheRead(err)), - }; - - if metadata.is_dir() { - Ok(ExtractedSource::Directory(path)) - } else { - debug!("Unpacking for build: {source_dist}"); - - let temp_dir = - tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels)) - .map_err(Error::CacheWrite)?; - - // Unzip the archive into the temporary directory. - let reader = fs_err::tokio::File::open(&path) - .await - .map_err(Error::CacheRead)?; - uv_extract::seek::archive(tokio::io::BufReader::new(reader), path, &temp_dir.path()) - .await?; - - // Extract the top-level directory from the archive. - let extracted = match uv_extract::strip_component(temp_dir.path()) { - Ok(top_level) => top_level, - Err(uv_extract::Error::NonSingularArchive(_)) => temp_dir.path().to_path_buf(), - Err(err) => return Err(err.into()), - }; - - Ok(ExtractedSource::Archive(extracted, temp_dir)) - } - } - /// Build a source distribution, storing the built wheel in the cache. /// /// Returns the un-normalized disk filename, the parsed, normalized filename and the metadata @@ -1090,18 +1031,21 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } #[derive(Debug)] -enum ExtractedSource<'a> { +pub enum ExtractedSource { /// The source distribution was passed in as a directory, and so doesn't need to be extracted. - Directory(&'a Path), + Directory(PathBuf), /// The source distribution was passed in as an archive, and was extracted into a temporary /// directory. + /// + /// The extracted archive and temporary directory will be deleted when the `ExtractedSource` is + /// dropped. #[allow(dead_code)] Archive(PathBuf, TempDir), } -impl ExtractedSource<'_> { +impl ExtractedSource { /// Return the [`Path`] to the extracted source root. - fn path(&self) -> &Path { + pub fn path(&self) -> &Path { match self { ExtractedSource::Directory(path) => path, ExtractedSource::Archive(path, _) => path, @@ -1215,3 +1159,162 @@ fn read_wheel_metadata( let dist_info = read_archive_metadata(filename, &mut archive)?; Ok(Metadata23::parse_metadata(&dist_info)?) } + +/// Extract a local source distribution, if it's stored as a `.tar.gz` or `.zip` archive. +/// +/// TODO(charlie): Consider storing the extracted source in the cache, to avoid re-extracting +/// on every invocation. +async fn extract_archive(path: &Path, cache: &Cache) -> Result { + let metadata = match fs::metadata(&path).await { + Ok(metadata) => metadata, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => { + return Err(Error::NotFound(path.to_path_buf())); + } + Err(err) => return Err(Error::CacheRead(err)), + }; + + if metadata.is_dir() { + Ok(ExtractedSource::Directory(path.to_path_buf())) + } else { + debug!("Unpacking for build: {}", path.display()); + + let temp_dir = tempfile::tempdir_in(cache.bucket(CacheBucket::BuiltWheels)) + .map_err(Error::CacheWrite)?; + + // Unzip the archive into the temporary directory. + let reader = fs_err::tokio::File::open(&path) + .await + .map_err(Error::CacheRead)?; + uv_extract::seek::archive(tokio::io::BufReader::new(reader), path, &temp_dir.path()) + .await?; + + // Extract the top-level directory from the archive. + let extracted = match uv_extract::strip_component(temp_dir.path()) { + Ok(top_level) => top_level, + Err(uv_extract::Error::NonSingularArchive(_)) => temp_dir.path().to_path_buf(), + Err(err) => return Err(err.into()), + }; + + Ok(ExtractedSource::Archive(extracted, temp_dir)) + } +} + +/// Download a source distribution from a Git repository. +async fn fetch_git_archive( + url: &Url, + cache: &Cache, + reporter: Option<&Arc>, +) -> Result<(Fetch, Option), Error> { + debug!("Fetching source distribution from Git: {url}"); + let git_dir = cache.bucket(CacheBucket::Git); + + // Avoid races between different processes, too. + let lock_dir = git_dir.join("locks"); + fs::create_dir_all(&lock_dir) + .await + .map_err(Error::CacheWrite)?; + let canonical_url = cache_key::CanonicalUrl::new(url); + let _lock = LockedFile::acquire( + lock_dir.join(cache_key::digest(&canonical_url)), + &canonical_url, + ) + .map_err(Error::CacheWrite)?; + + let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?; + + let source = if let Some(reporter) = reporter { + GitSource::new(url, git_dir).with_reporter(Facade::from(reporter.clone())) + } else { + GitSource::new(url, git_dir) + }; + let fetch = tokio::task::spawn_blocking(move || source.fetch()) + .await? + .map_err(Error::Git)?; + Ok((fetch, subdirectory)) +} + +/// Download and extract a source distribution from a URL. +/// +/// This function will download the source distribution from the given URL, and extract it into a +/// directory. +/// +/// For VCS distributions, this method will checkout the URL into the shared Git cache. +/// +/// For local archives, this method will extract the archive into a temporary directory. +/// +/// For HTTP distributions, this method will download the archive and extract it into a temporary +/// directory. +pub async fn download_and_extract_archive( + url: &Url, + cache: &Cache, + client: &RegistryClient, +) -> Result { + match Scheme::parse(url.scheme()) { + // Ex) `file:///home/ferris/project/scripts/...` or `file:../editable/`. + Some(Scheme::File) => { + let path = url.to_file_path().expect("URL to be a file path"); + extract_archive(&path, cache).await + } + // Ex) `git+https://github.com/pallets/flask` + Some(Scheme::GitSsh | Scheme::GitHttps) => { + // Download the source distribution from the Git repository. + let (fetch, subdirectory) = fetch_git_archive(url, cache, None).await?; + let path = if let Some(subdirectory) = subdirectory { + fetch.path().join(subdirectory) + } else { + fetch.path().to_path_buf() + }; + Ok(ExtractedSource::Directory(path)) + } + // Ex) `https://download.pytorch.org/whl/torch_stable.html` + Some(Scheme::Http | Scheme::Https) => { + let filename = url.filename().expect("Distribution must have a filename"); + + // Build a request to download the source distribution. + let req = client + .uncached_client() + .get(url.clone()) + .header( + // `reqwest` defaults to accepting compressed responses. + // Specify identity encoding to get consistent .whl downloading + // behavior from servers. ref: https://github.com/pypa/pip/pull/1688 + "accept-encoding", + reqwest::header::HeaderValue::from_static("identity"), + ) + .build()?; + + // Execute the request over the network. + let response = client + .uncached_client() + .execute(req) + .await? + .error_for_status()?; + + // Download and unzip the source distribution into a temporary directory. + let temp_dir = tempfile::tempdir_in(cache.bucket(CacheBucket::BuiltWheels)) + .map_err(Error::CacheWrite)?; + let reader = response + .bytes_stream() + .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)) + .into_async_read(); + uv_extract::stream::archive(reader.compat(), filename.as_ref(), temp_dir.path()) + .await?; + + // Extract the top-level directory. + let extracted = match uv_extract::strip_component(temp_dir.path()) { + Ok(top_level) => top_level, + Err(uv_extract::Error::NonSingularArchive(_)) => temp_dir.path().to_path_buf(), + Err(err) => return Err(err.into()), + }; + + Ok(ExtractedSource::Archive(extracted, temp_dir)) + } + // Ex) `../editable/` + None => { + let path = url.to_file_path().expect("URL to be a file path"); + extract_archive(&path, cache).await + } + // Ex) `bzr+https://launchpad.net/bzr/+download/...` + Some(scheme) => Err(Error::UnsupportedScheme(scheme.to_string())), + } +} diff --git a/crates/uv/Cargo.toml b/crates/uv/Cargo.toml index 87a7066e1..51af69e52 100644 --- a/crates/uv/Cargo.toml +++ b/crates/uv/Cargo.toml @@ -25,6 +25,7 @@ uv-auth = { workspace = true, features = ["clap"] } uv-cache = { workspace = true, features = ["clap"] } uv-client = { workspace = true } uv-dispatch = { workspace = true } +uv-distribution = { workspace = true } uv-fs = { workspace = true } uv-installer = { workspace = true } uv-interpreter = { workspace = true } @@ -46,6 +47,7 @@ console = { workspace = true } ctrlc = { workspace = true } flate2 = { workspace = true, default-features = false } fs-err = { workspace = true, features = ["tokio"] } +futures = { workspace = true } indexmap = { workspace = true } indicatif = { workspace = true } itertools = { workspace = true } diff --git a/crates/uv/src/commands/pip_compile.rs b/crates/uv/src/commands/pip_compile.rs index 9711bbeed..b7118a92c 100644 --- a/crates/uv/src/commands/pip_compile.rs +++ b/crates/uv/src/commands/pip_compile.rs @@ -90,7 +90,18 @@ pub(crate) async fn pip_compile( } // Read all requirements from the provided sources. - let spec = RequirementsSpecification::from_sources( + let RequirementsSpecification { + project, + requirements, + constraints, + overrides, + editables, + extras: used_extras, + index_url, + extra_index_urls, + no_index, + find_links, + } = RequirementsSpecification::from_sources( requirements, constraints, overrides, @@ -103,7 +114,7 @@ pub(crate) async fn pip_compile( if let ExtrasSpecification::Some(extras) = extras { let mut unused_extras = extras .iter() - .filter(|extra| !spec.extras.contains(extra)) + .filter(|extra| !used_extras.contains(extra)) .collect::>(); if !unused_extras.is_empty() { unused_extras.sort_unstable(); @@ -116,22 +127,6 @@ pub(crate) async fn pip_compile( } } - // Convert from unnamed to named requirements. - let NamedRequirements { - project, - requirements, - constraints, - overrides, - editables, - index_url, - extra_index_urls, - no_index, - find_links, - } = NamedRequirements::from_spec(spec)?; - - // Read the lockfile, if present. - let preferences = read_lockfile(output_file, upgrade).await?; - // Find an interpreter to use for building distributions let interpreter = find_best_python(python_version.as_ref(), &cache)?; debug!( @@ -207,6 +202,25 @@ pub(crate) async fn pip_compile( .platform(interpreter.platform()) .build(); + // Read the lockfile, if present. + let preferences = read_lockfile(output_file, upgrade).await?; + + // Convert from unnamed to named requirements. + let NamedRequirements { + requirements, + constraints, + overrides, + editables, + } = NamedRequirements::from_spec( + requirements, + constraints, + overrides, + editables, + &cache, + &client, + ) + .await?; + // Resolve the flat indexes from `--find-links`. let flat_index = { let client = FlatIndexClient::new(&client, &cache); diff --git a/crates/uv/src/commands/pip_install.rs b/crates/uv/src/commands/pip_install.rs index 01d510b94..b5771985f 100644 --- a/crates/uv/src/commands/pip_install.rs +++ b/crates/uv/src/commands/pip_install.rs @@ -81,8 +81,18 @@ pub(crate) async fn pip_install( let start = Instant::now(); // Read all requirements from the provided sources. - let spec = - read_requirements(requirements, constraints, overrides, extras, connectivity).await?; + let RequirementsSpecification { + project, + requirements, + constraints, + overrides, + editables, + index_url, + extra_index_urls, + no_index, + find_links, + extras: _, + } = read_requirements(requirements, constraints, overrides, extras, connectivity).await?; // Detect the current Python interpreter. let venv = if let Some(python) = python.as_ref() { @@ -128,9 +138,9 @@ pub(crate) async fn pip_install( // magnitude faster to validate the environment than to resolve the requirements. if reinstall.is_none() && upgrade.is_none() - && site_packages.satisfies(&spec.requirements, &spec.editables, &spec.constraints)? + && site_packages.satisfies(&requirements, &editables, &constraints)? { - let num_requirements = spec.requirements.len() + spec.editables.len(); + let num_requirements = requirements.len() + editables.len(); let s = if num_requirements == 1 { "" } else { "s" }; writeln!( printer.stderr(), @@ -148,19 +158,6 @@ pub(crate) async fn pip_install( return Ok(ExitStatus::Success); } - // Convert from unnamed to named requirements. - let NamedRequirements { - project, - requirements, - constraints, - overrides, - editables, - index_url, - extra_index_urls, - no_index, - find_links, - } = NamedRequirements::from_spec(spec)?; - // Determine the tags, markers, and interpreter to use for resolution. let interpreter = venv.interpreter().clone(); let tags = venv.interpreter().tags()?; @@ -185,6 +182,22 @@ pub(crate) async fn pip_install( .platform(interpreter.platform()) .build(); + // Convert from unnamed to named requirements. + let NamedRequirements { + requirements, + constraints, + overrides, + editables, + } = NamedRequirements::from_spec( + requirements, + constraints, + overrides, + editables, + &cache, + &client, + ) + .await?; + // Resolve the flat indexes from `--find-links`. let flat_index = { let client = FlatIndexClient::new(&client, &cache); diff --git a/crates/uv/src/commands/pip_sync.rs b/crates/uv/src/commands/pip_sync.rs index dddf82d83..57db4cdc6 100644 --- a/crates/uv/src/commands/pip_sync.rs +++ b/crates/uv/src/commands/pip_sync.rs @@ -54,10 +54,21 @@ pub(crate) async fn pip_sync( let start = std::time::Instant::now(); // Read all requirements from the provided sources. - let spec = RequirementsSpecification::from_simple_sources(sources, connectivity).await?; + let RequirementsSpecification { + project: _, + requirements, + constraints, + overrides, + editables, + extras: _, + index_url, + extra_index_urls, + no_index, + find_links, + } = RequirementsSpecification::from_simple_sources(sources, connectivity).await?; // Validate that the requirements are non-empty. - let num_requirements = spec.requirements.len() + spec.editables.len(); + let num_requirements = requirements.len() + editables.len(); if num_requirements == 0 { writeln!(printer.stderr(), "No requirements found")?; return Ok(ExitStatus::Success); @@ -97,19 +108,6 @@ pub(crate) async fn pip_sync( } } - // Convert from unnamed to named requirements. - let NamedRequirements { - project: _project, - requirements, - constraints: _constraints, - overrides: _overrides, - editables, - index_url, - extra_index_urls, - no_index, - find_links, - } = NamedRequirements::from_spec(spec)?; - let _lock = venv.lock()?; // Determine the current environment markers. @@ -134,6 +132,22 @@ pub(crate) async fn pip_sync( .platform(venv.interpreter().platform()) .build(); + // Convert from unnamed to named requirements. + let NamedRequirements { + requirements, + constraints: _constraints, + overrides: _overrides, + editables, + } = NamedRequirements::from_spec( + requirements, + constraints, + overrides, + editables, + &cache, + &client, + ) + .await?; + // Resolve the flat indexes from `--find-links`. let flat_index = { let client = FlatIndexClient::new(&client, &cache); diff --git a/crates/uv/src/requirements.rs b/crates/uv/src/requirements.rs index 272b929b9..1c3c3474c 100644 --- a/crates/uv/src/requirements.rs +++ b/crates/uv/src/requirements.rs @@ -6,7 +6,7 @@ use std::str::FromStr; use anyhow::{Context, Result}; use configparser::ini::Ini; use console::Term; -use distribution_filename::{SourceDistFilename, WheelFilename}; +use futures::{StreamExt, TryStreamExt}; use indexmap::IndexMap; use once_cell::sync::Lazy; use regex::Regex; @@ -14,13 +14,14 @@ use rustc_hash::FxHashSet; use serde::Deserialize; use tracing::{debug, instrument, Level}; +use distribution_filename::{SourceDistFilename, WheelFilename}; use distribution_types::{FlatIndexLocation, IndexUrl, RemoteSource}; -use pep508_rs::{ - Requirement, RequirementsTxtRequirement, Scheme, UnnamedRequirement, VersionOrUrl, -}; +use pep508_rs::{Requirement, RequirementsTxtRequirement, UnnamedRequirement, VersionOrUrl}; use pypi_types::Metadata10; use requirements_txt::{EditableRequirement, FindLink, RequirementsTxt}; -use uv_client::Connectivity; +use uv_cache::Cache; +use uv_client::{Connectivity, RegistryClient}; +use uv_distribution::download_and_extract_archive; use uv_fs::Simplified; use uv_normalize::{ExtraName, PackageName}; use uv_resolver::{Preference, PreferenceError}; @@ -502,8 +503,6 @@ pub(crate) async fn read_lockfile( /// Like [`RequirementsSpecification`], but with concrete names for all requirements. #[derive(Debug, Default)] pub(crate) struct NamedRequirements { - /// The name of the project specifying requirements. - pub(crate) project: Option, /// The requirements for the project. pub(crate) requirements: Vec, /// The constraints for the project. @@ -512,43 +511,46 @@ pub(crate) struct NamedRequirements { pub(crate) overrides: Vec, /// Package to install as editable installs pub(crate) editables: Vec, - /// The index URL to use for fetching packages. - pub(crate) index_url: Option, - /// The extra index URLs to use for fetching packages. - pub(crate) extra_index_urls: Vec, - /// Whether to disallow index usage. - pub(crate) no_index: bool, - /// The `--find-links` locations to use for fetching packages. - pub(crate) find_links: Vec, } impl NamedRequirements { /// Convert a [`RequirementsSpecification`] into a [`NamedRequirements`]. - pub(crate) fn from_spec(spec: RequirementsSpecification) -> Result { - Ok(Self { - project: spec.project, - requirements: spec - .requirements - .into_iter() - .map(|requirement| match requirement { + pub(crate) async fn from_spec( + requirements: Vec, + constraints: Vec, + overrides: Vec, + editables: Vec, + cache: &Cache, + client: &RegistryClient, + ) -> Result { + // Resolve all unnamed references. + let requirements = futures::stream::iter(requirements) + .map(|requirement| async { + match requirement { RequirementsTxtRequirement::Pep508(requirement) => Ok(requirement), RequirementsTxtRequirement::Unnamed(requirement) => { - Self::name_requirement(requirement) + Self::name_requirement(requirement, cache, client).await } - }) - .collect::>()?, - constraints: spec.constraints, - overrides: spec.overrides, - editables: spec.editables, - index_url: spec.index_url, - extra_index_urls: spec.extra_index_urls, - no_index: spec.no_index, - find_links: spec.find_links, + } + }) + .buffered(50) + .try_collect() + .await?; + + Ok(Self { + requirements, + constraints, + overrides, + editables, }) } /// Infer the package name for a given "unnamed" requirement. - fn name_requirement(requirement: UnnamedRequirement) -> Result { + async fn name_requirement( + requirement: UnnamedRequirement, + cache: &Cache, + client: &RegistryClient, + ) -> Result { // If the requirement is a wheel, extract the package name from the wheel filename. // // Ex) `anyio-4.3.0-py3-none-any.whl` @@ -583,118 +585,60 @@ impl NamedRequirements { }); } - // Otherwise, download and/or extract the source archive. - if Scheme::parse(requirement.url.scheme()) == Some(Scheme::File) { - let path = requirement.url.to_file_path().map_err(|()| { - anyhow::anyhow!("Unable to convert file URL to path: {requirement}") + // Download the archive and attempt to infer the package name from the archive contents. + let source = download_and_extract_archive(&requirement.url, cache, client) + .await + .with_context(|| { + format!("Unable to infer package name for the unnamed requirement: {requirement}") })?; - if !path.exists() { - return Err(anyhow::anyhow!( - "Unnamed requirement at {path} not found", - path = path.simplified_display() - )); - } + // Extract the path to the root of the distribution. + let path = source.path(); - // Attempt to read a `PKG-INFO` from the directory. - if let Some(metadata) = fs_err::read(path.join("PKG-INFO")) - .ok() - .and_then(|contents| Metadata10::parse_pkg_info(&contents).ok()) - { + // Attempt to read a `PKG-INFO` from the directory. + if let Some(metadata) = fs_err::read(path.join("PKG-INFO")) + .ok() + .and_then(|contents| Metadata10::parse_pkg_info(&contents).ok()) + { + debug!( + "Found PKG-INFO metadata for {path} ({name})", + path = path.display(), + name = metadata.name + ); + return Ok(Requirement { + name: metadata.name, + extras: requirement.extras, + version_or_url: Some(VersionOrUrl::Url(requirement.url)), + marker: requirement.marker, + }); + } + + // Attempt to read a `pyproject.toml` file. + if let Some(pyproject) = fs_err::read_to_string(path.join("pyproject.toml")) + .ok() + .and_then(|contents| toml::from_str::(&contents).ok()) + { + // Read PEP 621 metadata from the `pyproject.toml`. + if let Some(project) = pyproject.project { debug!( - "Found PKG-INFO metadata for {path} ({name})", + "Found PEP 621 metadata for {path} in `pyproject.toml` ({name})", path = path.display(), - name = metadata.name + name = project.name ); return Ok(Requirement { - name: metadata.name, + name: project.name, extras: requirement.extras, version_or_url: Some(VersionOrUrl::Url(requirement.url)), marker: requirement.marker, }); } - // Attempt to read a `pyproject.toml` file. - if let Some(pyproject) = fs_err::read_to_string(path.join("pyproject.toml")) - .ok() - .and_then(|contents| toml::from_str::(&contents).ok()) - { - // Read PEP 621 metadata from the `pyproject.toml`. - if let Some(project) = pyproject.project { - debug!( - "Found PEP 621 metadata for {path} in `pyproject.toml` ({name})", - path = path.display(), - name = project.name - ); - return Ok(Requirement { - name: project.name, - extras: requirement.extras, - version_or_url: Some(VersionOrUrl::Url(requirement.url)), - marker: requirement.marker, - }); - } - - // Read Poetry-specific metadata from the `pyproject.toml`. - if let Some(tool) = pyproject.tool { - if let Some(poetry) = tool.poetry { - if let Some(name) = poetry.name { - debug!( - "Found Poetry metadata for {path} in `pyproject.toml` ({name})", - path = path.display(), - name = name - ); - return Ok(Requirement { - name, - extras: requirement.extras, - version_or_url: Some(VersionOrUrl::Url(requirement.url)), - marker: requirement.marker, - }); - } - } - } - } - - // Attempt to read a `setup.cfg` from the directory. - if let Some(setup_cfg) = fs_err::read_to_string(path.join("setup.cfg")) - .ok() - .and_then(|contents| { - let mut ini = Ini::new_cs(); - ini.set_multiline(true); - ini.read(contents).ok() - }) - { - if let Some(section) = setup_cfg.get("metadata") { - if let Some(Some(name)) = section.get("name") { - if let Ok(name) = PackageName::from_str(name) { - debug!( - "Found setuptools metadata for {path} in `setup.cfg` ({name})", - path = path.display(), - name = name - ); - return Ok(Requirement { - name, - extras: requirement.extras, - version_or_url: Some(VersionOrUrl::Url(requirement.url)), - marker: requirement.marker, - }); - } - } - } - } - - // Attempt to read a `setup.py` from the directory. - if let Ok(setup_py) = fs_err::read_to_string(path.join("setup.py")) { - static SETUP_PY_NAME: Lazy = - Lazy::new(|| Regex::new(r#"name\s*[=:]\s*['"](?P[^'"]+)['"]"#).unwrap()); - - if let Some(name) = SETUP_PY_NAME - .captures(&setup_py) - .and_then(|captures| captures.name("name")) - .map(|name| name.as_str()) - { - if let Ok(name) = PackageName::from_str(name) { + // Read Poetry-specific metadata from the `pyproject.toml`. + if let Some(tool) = pyproject.tool { + if let Some(poetry) = tool.poetry { + if let Some(name) = poetry.name { debug!( - "Found setuptools metadata for {path} in `setup.py` ({name})", + "Found Poetry metadata for {path} in `pyproject.toml` ({name})", path = path.display(), name = name ); @@ -709,6 +653,61 @@ impl NamedRequirements { } } + // Attempt to read a `setup.cfg` from the directory. + if let Some(setup_cfg) = fs_err::read_to_string(path.join("setup.cfg")) + .ok() + .and_then(|contents| { + let mut ini = Ini::new_cs(); + ini.set_multiline(true); + ini.read(contents).ok() + }) + { + if let Some(section) = setup_cfg.get("metadata") { + if let Some(Some(name)) = section.get("name") { + if let Ok(name) = PackageName::from_str(name) { + debug!( + "Found setuptools metadata for {path} in `setup.cfg` ({name})", + path = path.display(), + name = name + ); + return Ok(Requirement { + name, + extras: requirement.extras, + version_or_url: Some(VersionOrUrl::Url(requirement.url)), + marker: requirement.marker, + }); + } + } + } + } + + // Attempt to read a `setup.py` from the directory. + if let Ok(setup_py) = fs_err::read_to_string(path.join("setup.py")) { + static SETUP_PY_NAME: Lazy = + Lazy::new(|| Regex::new(r#"name\s*[=:]\s*['"](?P[^'"]+)['"]"#).unwrap()); + + if let Some(name) = SETUP_PY_NAME + .captures(&setup_py) + .and_then(|captures| captures.name("name")) + .map(|name| name.as_str()) + { + if let Ok(name) = PackageName::from_str(name) { + debug!( + "Found setuptools metadata for {path} in `setup.py` ({name})", + path = path.display(), + name = name + ); + return Ok(Requirement { + name, + extras: requirement.extras, + version_or_url: Some(VersionOrUrl::Url(requirement.url)), + marker: requirement.marker, + }); + } + } + } + + // TODO(charlie): If this is common, consider running the PEP 517 build hooks. Err(anyhow::anyhow!( "Unable to infer package name for the unnamed requirement: {requirement}" )) diff --git a/crates/uv/tests/pip_compile.rs b/crates/uv/tests/pip_compile.rs index 58d5d2325..4b9e25854 100644 --- a/crates/uv/tests/pip_compile.rs +++ b/crates/uv/tests/pip_compile.rs @@ -3516,6 +3516,7 @@ fn unnamed_requirement_ambiguous() -> Result<()> { ----- stderr ----- error: Unable to infer package name for the unnamed requirement: https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0 + Caused by: HTTP status client error (404 Not Found) for url (https://files.pythonhosted.org/packages/36/42/015c23096649b908c809c69388a805a571a3bea44362fe87e33fc3afa01f/flask-3.0.0) "### ); @@ -5749,7 +5750,7 @@ fn preserve_hashes_newer_version() -> Result<()> { /// Detect the package name from metadata sources from local directories. #[test] -fn detect_package_name() -> Result<()> { +fn unnamed_path_requirement() -> Result<()> { let context = TestContext::new("3.12"); let requirements_in = context.temp_dir.child("requirements.in"); requirements_in.write_str(indoc! {r" @@ -5815,3 +5816,78 @@ fn detect_package_name() -> Result<()> { Ok(()) } + +/// Detect the package name from an unnamed Git requirement. +#[test] +fn unnamed_git_requirement() -> Result<()> { + let context = TestContext::new("3.12"); + let requirements_in = context.temp_dir.child("requirements.in"); + requirements_in.write_str("git+https://github.com/pallets/flask.git")?; + + uv_snapshot!(context.compile() + .arg("requirements.in"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + # This file was autogenerated by uv via the following command: + # uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2023-11-18T12:00:00Z requirements.in + blinker==1.7.0 + # via flask + click==8.1.7 + # via flask + flask @ git+https://github.com/pallets/flask.git@b90a4f1f4a370e92054b9cc9db0efcb864f87ebe + itsdangerous==2.1.2 + # via flask + jinja2==3.1.2 + # via flask + markupsafe==2.1.3 + # via + # jinja2 + # werkzeug + werkzeug==3.0.1 + # via flask + + ----- stderr ----- + Resolved 7 packages in [TIME] + "###); + + Ok(()) +} + +/// Detect the package name from an unnamed HTTPS requirement. +#[test] +fn unnamed_https_requirement() -> Result<()> { + // Given the filename `3.0.2.tar.gz`, we need to download the file to determine the package name. + let context = TestContext::new("3.12"); + let requirements_in = context.temp_dir.child("requirements.in"); + requirements_in.write_str("https://github.com/pallets/flask/archive/refs/tags/3.0.2.tar.gz")?; + + uv_snapshot!(context.compile() + .arg("requirements.in"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + # This file was autogenerated by uv via the following command: + # uv pip compile --cache-dir [CACHE_DIR] --exclude-newer 2023-11-18T12:00:00Z requirements.in + blinker==1.7.0 + # via flask + click==8.1.7 + # via flask + flask @ https://github.com/pallets/flask/archive/refs/tags/3.0.2.tar.gz + itsdangerous==2.1.2 + # via flask + jinja2==3.1.2 + # via flask + markupsafe==2.1.3 + # via + # jinja2 + # werkzeug + werkzeug==3.0.1 + # via flask + + ----- stderr ----- + Resolved 7 packages in [TIME] + "###); + + Ok(()) +}