diff --git a/crates/puffin-cli/Cargo.toml b/crates/puffin-cli/Cargo.toml index e4df72004..bc0509b8e 100644 --- a/crates/puffin-cli/Cargo.toml +++ b/crates/puffin-cli/Cargo.toml @@ -20,3 +20,7 @@ async-std = { version = "1.12.0", features = [ ] } futures = "0.3.28" pep508_rs = "0.2.3" +pep440_rs = "0.3.12" +tracing = "0.1.37" +tracing-tree = "0.2.5" +tracing-subscriber = { version = "0.3.17", features = ["env-filter"] } diff --git a/crates/puffin-cli/src/commands/install.rs b/crates/puffin-cli/src/commands/install.rs index 897279569..ab16703cf 100644 --- a/crates/puffin-cli/src/commands/install.rs +++ b/crates/puffin-cli/src/commands/install.rs @@ -1,17 +1,35 @@ +use std::collections::{HashMap, HashSet}; use std::path::Path; use std::str::FromStr; use anyhow::Result; use futures::{StreamExt, TryFutureExt}; -use pep508_rs::VersionOrUrl; +use pep440_rs::Version; +use pep508_rs::{MarkerEnvironment, Requirement, StringVersion, VersionOrUrl}; +use tracing::trace; use puffin_client::{PypiClientBuilder, SimpleJson}; +use puffin_requirements::package_name::PackageName; use puffin_requirements::wheel::WheelName; -use puffin_requirements::Requirement; use crate::commands::ExitStatus; pub(crate) async fn install(src: &Path) -> Result { + // TODO(charlie): Fetch from the environment. + let env = MarkerEnvironment { + implementation_name: String::new(), + implementation_version: StringVersion::from_str("3.10.0").unwrap(), + os_name: String::new(), + platform_machine: String::new(), + platform_python_implementation: String::new(), + platform_release: String::new(), + platform_system: String::new(), + platform_version: String::new(), + python_full_version: StringVersion::from_str("3.10.0").unwrap(), + python_version: StringVersion::from_str("3.10.0").unwrap(), + sys_platform: String::new(), + }; + // Read the `requirements.txt` from disk. let requirements_txt = std::fs::read_to_string(src)?; @@ -24,6 +42,8 @@ pub(crate) async fn install(src: &Path) -> Result { // Fetch metadata in parallel. let (package_sink, package_stream) = futures::channel::mpsc::unbounded(); + let mut resolution: HashMap = HashMap::with_capacity(requirements.len()); + // Create a stream of futures that fetch metadata for each requirement. let mut package_stream = package_stream .map(|requirement: Requirement| { @@ -31,28 +51,32 @@ pub(crate) async fn install(src: &Path) -> Result { .simple(requirement.name.clone()) .map_ok(move |metadata| (metadata, requirement)) }) - .buffer_unordered(32) - .ready_chunks(32); + .buffer_unordered(48) + .ready_chunks(48); // Push all the requirements into the sink. - let mut in_flight = 0; + let mut in_flight: HashSet = HashSet::with_capacity(requirements.len()); for requirement in requirements.iter() { package_sink.unbounded_send(requirement.clone())?; - in_flight += 1; + in_flight.insert(PackageName::normalize(&requirement.name)); } while let Some(chunk) = package_stream.next().await { - in_flight -= chunk.len(); for result in chunk { let (metadata, requirement): (SimpleJson, Requirement) = result?; + // Remove this requirement from the in-flight set. + let normalized_name = PackageName::normalize(&requirement.name); + in_flight.remove(&normalized_name); + // TODO(charlie): Support URLs. Right now, we treat a URL as an unpinned dependency. - let specifiers = requirement.version_or_url.and_then(|version_or_url| { - match version_or_url { + let specifiers = requirement + .version_or_url + .as_ref() + .and_then(|version_or_url| match version_or_url { VersionOrUrl::VersionSpecifier(specifiers) => Some(specifiers), VersionOrUrl::Url(_) => None, - } - }); + }); // Pick a version that satisfies the requirement. let Some(file) = metadata.files.iter().rev().find(|file| { @@ -68,16 +92,61 @@ pub(crate) async fn install(src: &Path) -> Result { continue; }; - #[allow(clippy::print_stdout)] - { - println!("{}: {:?}", requirement.name, file); + // Fetch the metadata for this specific version. + let metadata = client.file(file).await?; + trace!( + "Selecting {version} for {requirement}", + version = metadata.version, + requirement = requirement + ); + + // Add to the resolved set. + let normalized_name = PackageName::normalize(&requirement.name); + resolution.insert(normalized_name, metadata.version); + + // Enqueue its dependencies. + for dependency in metadata.requires_dist { + if !dependency + .evaluate_markers(&env, requirement.extras.clone().unwrap_or_default()) + { + trace!("Ignoring {dependency} because it doesn't match the environment"); + continue; + } + + if dependency + .extras + .as_ref() + .is_some_and(|extras| !extras.is_empty()) + { + trace!("Ignoring {dependency} because it has extras"); + continue; + } + + let normalized_name = PackageName::normalize(&dependency.name); + if resolution.contains_key(&normalized_name) { + continue; + } + + if !in_flight.insert(normalized_name) { + continue; + } + + trace!("Enqueueing {dependency}"); + package_sink.unbounded_send(dependency)?; } } - if in_flight == 0 { + if in_flight.is_empty() { break; } } + for (name, version) in resolution { + #[allow(clippy::print_stdout)] + { + println!("{name}=={version}"); + } + } + Ok(ExitStatus::Success) } diff --git a/crates/puffin-cli/src/logging.rs b/crates/puffin-cli/src/logging.rs new file mode 100644 index 000000000..3931cf290 --- /dev/null +++ b/crates/puffin-cli/src/logging.rs @@ -0,0 +1,20 @@ +use anyhow::Result; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::{EnvFilter, Layer, Registry}; +use tracing_tree::time::Uptime; + +pub(crate) fn setup_logging() -> Result<()> { + let subscriber = Registry::default().with( + tracing_tree::HierarchicalLayer::default() + .with_indent_lines(true) + .with_indent_amount(2) + .with_bracketed_fields(true) + .with_targets(true) + .with_writer(|| Box::new(std::io::stderr())) + .with_timer(Uptime::default()) + .with_filter(EnvFilter::from_default_env()), + ); + tracing::subscriber::set_global_default(subscriber)?; + + Ok(()) +} diff --git a/crates/puffin-cli/src/main.rs b/crates/puffin-cli/src/main.rs index 798f29994..1615d3011 100644 --- a/crates/puffin-cli/src/main.rs +++ b/crates/puffin-cli/src/main.rs @@ -7,6 +7,7 @@ use colored::Colorize; use crate::commands::ExitStatus; mod commands; +mod logging; #[derive(Parser)] #[command(author, version, about)] @@ -32,6 +33,8 @@ struct InstallArgs { async fn main() -> ExitCode { let cli = Cli::parse(); + let _ = logging::setup_logging(); + let result = match &cli.command { Commands::Install(install) => commands::install(&install.src).await, }; diff --git a/crates/puffin-client/Cargo.toml b/crates/puffin-client/Cargo.toml index b6c5924a9..59eb01d79 100644 --- a/crates/puffin-client/Cargo.toml +++ b/crates/puffin-client/Cargo.toml @@ -6,11 +6,13 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -http-cache-reqwest = "0.11.3" +puffin-requirements = { path = "../puffin-requirements" } + +http-cache-reqwest = { version = "0.11.3" } reqwest = { version = "0.11.22", features = ["json", "gzip", "stream"] } -reqwest-middleware = "0.2.3" -reqwest-retry = "0.3.0" -serde = "1.0.188" -serde_json = "1.0.107" +reqwest-middleware = { version = "0.2.3" } +reqwest-retry = { version = "0.3.0" } +serde = { version = "1.0.188" } +serde_json = { version = "1.0.107" } thiserror = { version = "1.0.49" } url = { version = "2.4.1" } diff --git a/crates/puffin-client/src/api/mod.rs b/crates/puffin-client/src/api.rs similarity index 63% rename from crates/puffin-client/src/api/mod.rs rename to crates/puffin-client/src/api.rs index eced665d6..85dcfe9a7 100644 --- a/crates/puffin-client/src/api/mod.rs +++ b/crates/puffin-client/src/api.rs @@ -2,8 +2,11 @@ use reqwest::StatusCode; use serde::{Deserialize, Serialize}; use url::Url; +use puffin_requirements::metadata::Metadata21; +use puffin_requirements::package_name::PackageName; + +use crate::client::PypiClient; use crate::error::PypiClientError; -use crate::PypiClient; impl PypiClient { pub async fn simple( @@ -12,7 +15,9 @@ impl PypiClient { ) -> Result { // Format the URL for PyPI. let mut url = self.registry.join("simple")?; - url.path_segments_mut().unwrap().push(package_name.as_ref()); + url.path_segments_mut() + .unwrap() + .push(PackageName::normalize(&package_name).as_ref()); url.path_segments_mut().unwrap().push(""); url.set_query(Some("format=application/vnd.pypi.simple.v1+json")); @@ -46,6 +51,44 @@ impl PypiClient { .text() .await?) } + + pub async fn file(&self, file: &File) -> Result { + // Send to the proxy. + let url = self.proxy.join( + file.url + .strip_prefix("https://files.pythonhosted.org/") + .unwrap(), + )?; + + // Fetch from the registry. + let text = self.file_impl(&file.filename, &url).await?; + Metadata21::parse(text.as_bytes()).map_err(std::convert::Into::into) + } + + async fn file_impl( + &self, + filename: impl AsRef, + url: &Url, + ) -> Result { + Ok(self + .client + .get(url.clone()) + .send() + .await? + .error_for_status() + .map_err(|err| { + if err.status() == Some(StatusCode::NOT_FOUND) { + PypiClientError::FileNotFound( + (*self.registry).clone(), + filename.as_ref().to_string(), + ) + } else { + PypiClientError::RequestError(err) + } + })? + .text() + .await?) + } } #[derive(Debug, Serialize, Deserialize)] diff --git a/crates/puffin-client/src/client.rs b/crates/puffin-client/src/client.rs new file mode 100644 index 000000000..c8e1ac4d6 --- /dev/null +++ b/crates/puffin-client/src/client.rs @@ -0,0 +1,92 @@ +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions}; +use reqwest::ClientBuilder; +use reqwest_middleware::ClientWithMiddleware; +use reqwest_retry::policies::ExponentialBackoff; +use reqwest_retry::RetryTransientMiddleware; +use url::Url; + +#[derive(Debug, Clone)] +pub struct PypiClientBuilder { + registry: Url, + proxy: Url, + retries: u32, + cache: Option, +} + +impl Default for PypiClientBuilder { + fn default() -> Self { + Self { + registry: Url::parse("https://pypi.org").unwrap(), + proxy: Url::parse("https://pypi-metadata.ruff.rs").unwrap(), + cache: None, + retries: 0, + } + } +} + +impl PypiClientBuilder { + #[must_use] + pub fn registry(mut self, registry: Url) -> Self { + self.registry = registry; + self + } + + #[must_use] + pub fn proxy(mut self, proxy: Url) -> Self { + self.proxy = proxy; + self + } + + #[must_use] + pub fn retries(mut self, retries: u32) -> Self { + self.retries = retries; + self + } + + #[must_use] + pub fn cache(mut self, cache: impl AsRef) -> Self { + self.cache = Some(PathBuf::from(cache.as_ref())); + self + } + + pub fn build(self) -> PypiClient { + let client_raw = { + let client_core = ClientBuilder::new() + .user_agent("puffin") + .pool_max_idle_per_host(20) + .timeout(std::time::Duration::from_secs(60 * 5)); + + client_core.build().expect("Fail to build HTTP client.") + }; + + let retry_policy = ExponentialBackoff::builder().build_with_max_retries(self.retries); + let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy); + + let mut client_builder = + reqwest_middleware::ClientBuilder::new(client_raw).with(retry_strategy); + + if let Some(path) = self.cache { + client_builder = client_builder.with(Cache(HttpCache { + mode: CacheMode::Default, + manager: CACacheManager { path }, + options: HttpCacheOptions::default(), + })); + } + + PypiClient { + registry: Arc::new(self.registry), + proxy: Arc::new(self.proxy), + client: client_builder.build(), + } + } +} + +#[derive(Debug, Clone)] +pub struct PypiClient { + pub(crate) registry: Arc, + pub(crate) proxy: Arc, + pub(crate) client: ClientWithMiddleware, +} diff --git a/crates/puffin-client/src/error.rs b/crates/puffin-client/src/error.rs index 698441165..d7a86198f 100644 --- a/crates/puffin-client/src/error.rs +++ b/crates/puffin-client/src/error.rs @@ -1,3 +1,4 @@ +use puffin_requirements::metadata; use thiserror::Error; use url::Url; @@ -14,6 +15,14 @@ pub enum PypiClientError { #[error("Package `{1}` was not found in registry {0}.")] PackageNotFound(Url, String), + /// The metadata file could not be parsed. + #[error(transparent)] + MetadataParseError(#[from] metadata::Error), + + /// The metadata file was not found in the registry. + #[error("File `{1}` was not found in registry {0}.")] + FileNotFound(Url, String), + /// A generic request error happened while making a request. Refer to the /// error message for more details. #[error(transparent)] @@ -24,7 +33,7 @@ pub enum PypiClientError { #[error(transparent)] RequestMiddlewareError(#[from] reqwest_middleware::Error), - #[error("Received some unexpected JSON. Unable to parse.")] + #[error("Received some unexpected JSON: {source}")] BadJson { source: serde_json::Error, url: String, diff --git a/crates/puffin-client/src/lib.rs b/crates/puffin-client/src/lib.rs index d6c17252e..5d783c71d 100644 --- a/crates/puffin-client/src/lib.rs +++ b/crates/puffin-client/src/lib.rs @@ -1,87 +1,6 @@ -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use http_cache_reqwest::{CACacheManager, Cache, CacheMode, HttpCache, HttpCacheOptions}; -use reqwest::ClientBuilder; -use reqwest_middleware::ClientWithMiddleware; -use reqwest_retry::policies::ExponentialBackoff; -use reqwest_retry::RetryTransientMiddleware; -use url::Url; +pub use api::SimpleJson; +pub use client::PypiClientBuilder; mod api; +mod client; mod error; - -pub use api::SimpleJson; - -#[derive(Debug, Clone)] -pub struct PypiClientBuilder { - registry: Url, - retries: u32, - cache: Option, -} - -impl Default for PypiClientBuilder { - fn default() -> Self { - Self { - registry: Url::parse("https://pypi.org").unwrap(), - cache: None, - retries: 0, - } - } -} - -impl PypiClientBuilder { - #[must_use] - pub fn registry(mut self, registry: Url) -> Self { - self.registry = registry; - self - } - - #[must_use] - pub fn retries(mut self, retries: u32) -> Self { - self.retries = retries; - self - } - - #[must_use] - pub fn cache(mut self, cache: impl AsRef) -> Self { - self.cache = Some(PathBuf::from(cache.as_ref())); - self - } - - pub fn build(self) -> PypiClient { - let client_raw = { - let client_core = ClientBuilder::new() - .user_agent("puffin") - .pool_max_idle_per_host(20) - .timeout(std::time::Duration::from_secs(60 * 5)); - - client_core.build().expect("Fail to build HTTP client.") - }; - - let retry_policy = ExponentialBackoff::builder().build_with_max_retries(self.retries); - let retry_strategy = RetryTransientMiddleware::new_with_policy(retry_policy); - - let mut client_builder = - reqwest_middleware::ClientBuilder::new(client_raw).with(retry_strategy); - - if let Some(path) = self.cache { - client_builder = client_builder.with(Cache(HttpCache { - mode: CacheMode::Default, - manager: CACacheManager { path }, - options: HttpCacheOptions::default(), - })); - } - - PypiClient { - registry: Arc::new(self.registry), - client: client_builder.build(), - } - } -} - -#[derive(Debug, Clone)] -pub struct PypiClient { - pub(crate) registry: Arc, - pub(crate) client: ClientWithMiddleware, -} diff --git a/crates/puffin-requirements/Cargo.toml b/crates/puffin-requirements/Cargo.toml index 37b76b1b2..98d628281 100644 --- a/crates/puffin-requirements/Cargo.toml +++ b/crates/puffin-requirements/Cargo.toml @@ -6,15 +6,19 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -anyhow = "1.0.75" +anyhow = { version = "1.0.75" } clap = { version = "4.4.6", features = ["derive"] } colored = { version = "2.0.4" } -insta = "1.33.0" +insta = { version = "1.33.0" } +mailparse = { version = "0.14.0" } memchr = { version = "2.6.4" } -once_cell = "1.18.0" -pep440_rs = "0.3.12" -pep508_rs = { version = "0.2.3" } -regex = "1.9.6" +once_cell = { version = "1.18.0" } +pep440_rs = { version = "0.3.12", features = ["serde"] } +pep508_rs = { version = "0.2.3", features = ["serde"] } +regex = { version = "1.9.6" } +rfc2047-decoder = { version = "1.0.1" } +serde = { version = "1.0.188" } +thiserror = { version = "1.0.49" } [dev-dependencies] criterion = "0.5.1" diff --git a/crates/puffin-requirements/src/lib.rs b/crates/puffin-requirements/src/lib.rs index 6235d74f5..16fbcd671 100644 --- a/crates/puffin-requirements/src/lib.rs +++ b/crates/puffin-requirements/src/lib.rs @@ -1,12 +1,14 @@ -pub mod wheel; use std::borrow::Cow; - use std::ops::Deref; use std::str::FromStr; use anyhow::Result; use memchr::{memchr2, memchr_iter}; -pub use pep508_rs::{Pep508Error, Requirement}; +use pep508_rs::{Pep508Error, Requirement}; + +pub mod metadata; +pub mod package_name; +pub mod wheel; #[derive(Debug)] pub struct Requirements(Vec); @@ -196,10 +198,10 @@ fn find_newline(text: &str) -> Option<(usize, usize)> { mod tests { use std::str::FromStr; + use anyhow::Result; use insta::assert_debug_snapshot; use crate::Requirements; - use anyhow::Result; #[test] fn simple() -> Result<()> { diff --git a/crates/puffin-requirements/src/metadata.rs b/crates/puffin-requirements/src/metadata.rs new file mode 100644 index 000000000..e7f8a8804 --- /dev/null +++ b/crates/puffin-requirements/src/metadata.rs @@ -0,0 +1,194 @@ +//! Derived from `pypi_types_crate`. + +use std::collections::HashMap; +use std::io; +use std::str::FromStr; + +use mailparse::{MailHeaderMap, MailParseError}; +use pep440_rs::{Pep440Error, Version, VersionSpecifiers}; +use pep508_rs::{Pep508Error, Requirement}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +/// Python Package Metadata 2.1 as specified in +/// +/// +/// One addition is the requirements fixup which insert missing commas e.g. in +/// `elasticsearch-dsl (>=7.2.0<8.0.0)` +#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq)] +#[serde(rename_all = "kebab-case")] +pub struct Metadata21 { + // Mandatory fields + pub metadata_version: String, + pub name: String, + pub version: Version, + // Optional fields + pub platforms: Vec, + pub supported_platforms: Vec, + pub summary: Option, + pub description: Option, + pub description_content_type: Option, + pub keywords: Option, + pub home_page: Option, + pub download_url: Option, + pub author: Option, + pub author_email: Option, + pub maintainer: Option, + pub maintainer_email: Option, + pub license: Option, + pub classifiers: Vec, + pub requires_dist: Vec, + pub provides_dist: Vec, + pub obsoletes_dist: Vec, + pub requires_python: Option, + pub requires_external: Vec, + pub project_urls: HashMap, + pub provides_extras: Vec, +} + +/// +/// +/// The error type +#[derive(Error, Debug)] +pub enum Error { + /// I/O error + #[error(transparent)] + Io(#[from] io::Error), + /// mail parse error + #[error(transparent)] + MailParse(#[from] MailParseError), + /// Metadata field not found + #[error("metadata field {0} not found")] + FieldNotFound(&'static str), + /// Unknown distribution type + #[error("unknown distribution type")] + UnknownDistributionType, + /// Metadata file not found + #[error("metadata file not found")] + MetadataNotFound, + /// Invalid project URL (no comma) + #[error("Invalid Project-URL field (missing comma): '{0}'")] + InvalidProjectUrl(String), + /// Multiple metadata files found + #[error("found multiple metadata files: {0:?}")] + MultipleMetadataFiles(Vec), + /// Invalid Version + #[error("invalid version: {0}")] + Pep440VersionError(String), + /// Invalid VersionSpecifier + #[error(transparent)] + Pep440Error(#[from] Pep440Error), + /// Invalid Requirement + #[error(transparent)] + Pep508Error(#[from] Pep508Error), +} + +/// From +impl Metadata21 { + /// Parse distribution metadata from metadata bytes + pub fn parse(content: &[u8]) -> Result { + // HACK: trick mailparse to parse as UTF-8 instead of ASCII + let mut mail = b"Content-Type: text/plain; charset=utf-8\n".to_vec(); + mail.extend_from_slice(content); + + let msg = mailparse::parse_mail(&mail)?; + let headers = msg.get_headers(); + let get_first_value = |name| { + headers.get_first_header(name).and_then(|header| { + match rfc2047_decoder::decode(header.get_value_raw()) { + Ok(value) => { + if value == "UNKNOWN" { + None + } else { + Some(value) + } + } + Err(_) => None, + } + }) + }; + let get_all_values = |name| { + let values: Vec = headers + .get_all_values(name) + .into_iter() + .filter(|value| value != "UNKNOWN") + .collect(); + values + }; + let metadata_version = headers + .get_first_value("Metadata-Version") + .ok_or(Error::FieldNotFound("Metadata-Version"))?; + let name = headers + .get_first_value("Name") + .ok_or(Error::FieldNotFound("Name"))?; + let version = Version::from_str( + &headers + .get_first_value("Version") + .ok_or(Error::FieldNotFound("Version"))?, + ) + .map_err(Error::Pep440VersionError)?; + let platforms = get_all_values("Platform"); + let supported_platforms = get_all_values("Supported-Platform"); + let summary = get_first_value("Summary"); + let body = msg.get_body()?; + let description = if body.trim().is_empty() { + get_first_value("Description") + } else { + Some(body) + }; + let keywords = get_first_value("Keywords"); + let home_page = get_first_value("Home-Page"); + let download_url = get_first_value("Download-URL"); + let author = get_first_value("Author"); + let author_email = get_first_value("Author-email"); + let license = get_first_value("License"); + let classifiers = get_all_values("Classifier"); + let requires_dist = get_all_values("Requires-Dist") + .iter() + .map(|requires_dist| Requirement::from_str(requires_dist)) + .collect::, _>>()?; + let provides_dist = get_all_values("Provides-Dist"); + let obsoletes_dist = get_all_values("Obsoletes-Dist"); + let maintainer = get_first_value("Maintainer"); + let maintainer_email = get_first_value("Maintainer-email"); + let requires_python = get_first_value("Requires-Python") + .map(|requires_python| VersionSpecifiers::from_str(&requires_python)) + .transpose()?; + let requires_external = get_all_values("Requires-External"); + let project_urls = get_all_values("Project-URL") + .iter() + .map(|name_value| match name_value.split_once(',') { + None => Err(Error::InvalidProjectUrl(name_value.clone())), + Some((name, value)) => Ok((name.to_string(), value.trim().to_string())), + }) + .collect::>()?; + let provides_extras = get_all_values("Provides-Extra"); + let description_content_type = get_first_value("Description-Content-Type"); + Ok(Metadata21 { + metadata_version, + name, + version, + platforms, + supported_platforms, + summary, + description, + description_content_type, + keywords, + home_page, + download_url, + author, + author_email, + maintainer, + maintainer_email, + license, + classifiers, + requires_dist, + provides_dist, + obsoletes_dist, + requires_python, + requires_external, + project_urls, + provides_extras, + }) + } +} diff --git a/crates/puffin-requirements/src/package_name.rs b/crates/puffin-requirements/src/package_name.rs new file mode 100644 index 000000000..91687177b --- /dev/null +++ b/crates/puffin-requirements/src/package_name.rs @@ -0,0 +1,35 @@ +use std::fmt; +use std::fmt::{Display, Formatter}; +use std::ops::Deref; + +use once_cell::sync::Lazy; +use regex::Regex; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PackageName(String); + +impl Display for PackageName { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +static NAME_NORMALIZE: Lazy = Lazy::new(|| Regex::new(r"[-_.]").unwrap()); + +impl PackageName { + /// See: + pub fn normalize(name: impl AsRef) -> Self { + // TODO(charlie): Avoid allocating in the common case (when no normalization is required). + let mut normalized = NAME_NORMALIZE.replace_all(name.as_ref(), "-").to_string(); + normalized.make_ascii_lowercase(); + Self(normalized) + } +} + +impl Deref for PackageName { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/crates/puffin-requirements/src/wheel.rs b/crates/puffin-requirements/src/wheel.rs index db3d5cacb..abeffafee 100644 --- a/crates/puffin-requirements/src/wheel.rs +++ b/crates/puffin-requirements/src/wheel.rs @@ -46,19 +46,19 @@ impl FromStr for WheelName { build_name = captures.get(2).unwrap().as_str().into(); } else { build_number = None; - build_name = "".to_owned(); + build_name = String::new(); } let [distribution, version, py_tags, abi_tags, arch_tags] = pieces.as_slice() else { bail!("can't parse binary name {s:?}"); }; - let distribution = distribution.to_string(); + let distribution = (*distribution).to_string(); let version = Version::from_str(version) .map_err(|e| anyhow!("failed to parse version {:?} from {:?}: {}", version, s, e))?; - let py_tags = py_tags.split('.').map(|tag| tag.into()).collect(); - let abi_tags = abi_tags.split('.').map(|tag| tag.into()).collect(); - let arch_tags = arch_tags.split('.').map(|tag| tag.into()).collect(); + let py_tags = py_tags.split('.').map(std::convert::Into::into).collect(); + let abi_tags = abi_tags.split('.').map(std::convert::Into::into).collect(); + let arch_tags = arch_tags.split('.').map(std::convert::Into::into).collect(); Ok(Self { distribution,