From 5219d372509f157a98a34aec02c3a3d93e4ab2ec Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 28 Jan 2024 12:14:59 -0500 Subject: [PATCH] add initial rkyv support (#1135) This PR adds initial support for [rkyv] to puffin. In particular, the main aim here is to make puffin-client's `SimpleMetadata` type possible to deserialize from a `&[u8]` without doing any copies. This PR **stops short of actuallying doing that zero-copy deserialization**. Instead, this PR is about adding the necessary trait impls to a variety of types, along with a smattering of small refactorings to make rkyv possible to use. For those unfamiliar, rkyv works via the interplay of three traits: `Archive`, `Serialize` and `Deserialize`. The usual flow of things is this: * Make a type `T` implement `Archive`, `Serialize` and `Deserialize`. rkyv helpfully provides `derive` macros to make this pretty painless in most cases. * The process of implementing `Archive` for `T` *usually* creates an entirely new distinct type within the same namespace. One can refer to this type without naming it explicitly via `Archived` (where `Archived` is a clever type alias defined by rkyv). * Serialization happens from `T` to (conceptually) a `Vec`. The serialization format is specifically designed to reflect the in-memory layout of `Archived`. Notably, *not* `T`. But `Archived`. * One can then get an `Archived` with no copying (albeit, we will likely need to incur some cost for validation) from the previously created `&[u8]`. This is quite literally [implemented as a pointer cast][rkyv-ptr-cast]. * The problem with an `Archived` is that it isn't your `T`. It's something else. And while there is limited interoperability between a `T` and an `Archived`, the main issue is that the surrounding code generally demands a `T` and not an `Archived`. **This is at the heart of the tension for introducing zero-copy deserialization, and this is mostly an intrinsic problem to the technique and not an rkyv-specific issue.** For this reason, given an `Archived`, one can get a `T` back via an explicit deserialization step. This step is like any other kind of deserialization, although generally faster since no real "parsing" is required. But it will allocate and create all necessary objects. This PR largely proceeds by deriving the three aforementioned traits for `SimpleMetadata`. And, of course, all of its type dependencies. But we stop there for now. The main issue with carrying this work forward so that rkyv is actually used to deserialize a `SimpleMetadata` is figuring out how to deal with `DataWithCachePolicy` inside of the cached client. Ideally, this type would itself have rkyv support, but adding it is difficult. The main difficulty lay in the fact that its `CachePolicy` type is opaque, not easily constructable and is internally the tip of the iceberg of a rat's nest of types found in more crates such as `http`. While one "dumb"-but-annoying approach would be to fork both of those crates and add rkyv trait impls to all necessary types, it is my belief that this is the wrong approach. What we'd *like* to do is not just use rkyv to deserialize a `DataWithCachePolicy`, but we'd actually like to get an `Archived` and make actual decisions used the archived type directly. Doing that will require some work to make `Archived` directly useful. My suspicion is that, after doing the above, we may want to mush forward with a similar approach for `SimpleMetadata`. That is, we want `Archived` to be as useful as possible. But right now, the structure of the code demands an eager conversion (and thus deserialization) into a `SimpleMetadata` and then into a `VersionMap`. Getting rid of that eagerness is, I think, the next step after dealing with `DataWithCachePolicy` to unlock bigger wins here. There are many commits in this PR, but most are tiny. I still encourage review to happen commit-by-commit. [rkyv]: https://rkyv.org/ [rkyv-ptr-cast]: https://docs.rs/rkyv/latest/src/rkyv/util/mod.rs.html#63-68 --- Cargo.lock | 155 ++++++- Cargo.toml | 1 + crates/distribution-filename/Cargo.toml | 4 + .../distribution-filename/src/source_dist.rs | 12 + crates/distribution-filename/src/wheel.rs | 6 + crates/distribution-types/Cargo.toml | 2 +- crates/distribution-types/src/file.rs | 31 +- crates/distribution-types/src/lib.rs | 14 +- crates/pep440-rs/Cargo.toml | 1 + crates/pep440-rs/src/lib.rs | 4 +- crates/pep440-rs/src/version.rs | 416 ++++++++++++++---- crates/pep440-rs/src/version_specifier.rs | 24 +- crates/pep508-rs/Cargo.toml | 2 + crates/pep508-rs/src/lib.rs | 2 +- crates/puffin-client/Cargo.toml | 3 +- crates/puffin-client/src/error.rs | 4 + crates/puffin-client/src/flat_index.rs | 4 +- crates/puffin-client/src/lib.rs | 3 +- crates/puffin-client/src/registry_client.rs | 101 +++-- crates/puffin-dev/src/resolve_many.rs | 2 +- .../src/distribution_database.rs | 6 +- crates/puffin-distribution/src/error.rs | 2 + crates/puffin-distribution/src/source/mod.rs | 12 +- crates/puffin-normalize/Cargo.toml | 1 + crates/puffin-normalize/src/package_name.rs | 16 +- crates/puffin-resolver/src/finder.rs | 44 +- .../puffin-resolver/src/pubgrub/specifier.rs | 16 +- crates/puffin-resolver/src/resolver/mod.rs | 10 +- crates/puffin-resolver/src/version_map.rs | 8 +- crates/pypi-types/Cargo.toml | 5 +- crates/pypi-types/src/base_url.rs | 41 ++ crates/pypi-types/src/simple_json.rs | 30 +- crates/requirements-txt/Cargo.toml | 4 +- 33 files changed, 782 insertions(+), 204 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ef110d04f..d9c52f8c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,17 @@ dependencies = [ "const-random", ] +[[package]] +name = "ahash" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.1.2" @@ -294,6 +305,18 @@ version = "2.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "block-buffer" version = "0.10.4" @@ -341,6 +364,28 @@ version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +[[package]] +name = "bytecheck" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6372023ac861f6e6dc89c8344a8f398fb42aaba2b5dbc649ca0c0e9dbcb627" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "byteorder" version = "1.5.0" @@ -712,7 +757,7 @@ version = "3.11.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f260e2fc850179ef410018660006951c1b55b79e8087e87111a2c388994b9b5" dependencies = [ - "ahash", + "ahash 0.3.8", "cfg-if 0.1.10", "num_cpus", ] @@ -802,6 +847,7 @@ dependencies = [ "pep440_rs 0.3.12", "platform-tags", "puffin-normalize", + "rkyv", "serde", "thiserror", "url", @@ -813,7 +859,6 @@ version = "0.0.1" dependencies = [ "anyhow", "cache-key", - "chrono", "data-encoding", "distribution-filename", "fs-err", @@ -825,6 +870,7 @@ dependencies = [ "puffin-git", "puffin-normalize", "pypi-types", + "rkyv", "rustc-hash", "serde", "serde_json", @@ -961,6 +1007,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + [[package]] name = "futures" version = "0.3.30" @@ -1199,6 +1251,9 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.7", +] [[package]] name = "hashbrown" @@ -2054,6 +2109,7 @@ dependencies = [ "once_cell", "pubgrub", "pyo3", + "rkyv", "serde", "tracing", "unicode-width", @@ -2085,6 +2141,7 @@ dependencies = [ "pyo3", "pyo3-log", "regex", + "rkyv", "serde", "serde_json", "testing_logger", @@ -2270,6 +2327,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "pubgrub" version = "0.2.1" @@ -2427,6 +2504,7 @@ dependencies = [ "reqwest", "reqwest-middleware", "reqwest-retry", + "rkyv", "rmp-serde", "rustc-hash", "serde", @@ -2672,6 +2750,7 @@ dependencies = [ name = "puffin-normalize" version = "0.0.1" dependencies = [ + "rkyv", "serde", ] @@ -2851,6 +2930,7 @@ dependencies = [ "pep508_rs", "puffin-normalize", "regex", + "rkyv", "serde", "serde_json", "tempfile", @@ -2897,6 +2977,12 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79ec282e887b434b68c18fe5c121d38e72a5cf35119b59e54ec5b992ea9c8eb0" +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + [[package]] name = "rand" version = "0.8.5" @@ -3031,6 +3117,15 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" +[[package]] +name = "rend" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2571463863a6bd50c32f94402933f03457a3fbaf697a707c5be741e459f08fd" +dependencies = [ + "bytecheck", +] + [[package]] name = "requirements-txt" version = "0.0.1" @@ -3164,6 +3259,35 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "rkyv" +version = "0.7.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "527a97cdfef66f65998b5f3b637c26f5a5ec09cc52a3f9932313ac645f4190f5" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5c462a1328c8e67e4d6dbad1eb0355dd43e8ab432c6e227a43657f16ade5033" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "rmp" version = "0.8.12" @@ -3403,6 +3527,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "similar" version = "2.4.0" @@ -3535,6 +3665,12 @@ dependencies = [ "libc", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "tar" version = "0.4.40" @@ -4068,6 +4204,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" + [[package]] name = "valuable" version = "0.1.0" @@ -4487,6 +4629,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + [[package]] name = "xattr" version = "1.3.1" diff --git a/Cargo.toml b/Cargo.toml index d8d335c63..ec6573679 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ regex = { version = "1.10.2" } reqwest = { version = "0.11.23", default-features = false, features = ["json", "gzip", "brotli", "stream", "rustls-tls"] } reqwest-middleware = { version = "0.2.4" } reqwest-retry = { version = "0.3.0" } +rkyv = { version = "0.7.43", features = ["strict", "validation"] } rmp-serde = { version = "1.1.2" } rustc-hash = { version = "1.1.0" } same-file = { version = "1.0.6" } diff --git a/crates/distribution-filename/Cargo.toml b/crates/distribution-filename/Cargo.toml index 74d97f099..1788c4df4 100644 --- a/crates/distribution-filename/Cargo.toml +++ b/crates/distribution-filename/Cargo.toml @@ -12,11 +12,15 @@ license = { workspace = true } [lints] workspace = true +[features] +rkyv = ["dep:rkyv", "pep440_rs/rkyv"] + [dependencies] pep440_rs = { path = "../pep440-rs" } platform-tags = { path = "../platform-tags" } puffin-normalize = { path = "../puffin-normalize" } +rkyv = { workspace = true, features = ["strict", "validation"], optional = true } serde = { workspace = true, optional = true } thiserror = { workspace = true } url = { workspace = true } diff --git a/crates/distribution-filename/src/source_dist.rs b/crates/distribution-filename/src/source_dist.rs index cf50aa78f..da81941c7 100644 --- a/crates/distribution-filename/src/source_dist.rs +++ b/crates/distribution-filename/src/source_dist.rs @@ -10,6 +10,12 @@ use puffin_normalize::{InvalidNameError, PackageName}; #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr(feature = "rkyv", archive_attr(derive(Debug)))] pub enum SourceDistExtension { Zip, TarGz, @@ -52,6 +58,12 @@ impl SourceDistExtension { /// need the latter. #[derive(Clone, Debug, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr(feature = "rkyv", archive_attr(derive(Debug)))] pub struct SourceDistFilename { pub name: PackageName, pub version: Version, diff --git a/crates/distribution-filename/src/wheel.rs b/crates/distribution-filename/src/wheel.rs index f83d0436b..5d2f343c9 100644 --- a/crates/distribution-filename/src/wheel.rs +++ b/crates/distribution-filename/src/wheel.rs @@ -11,6 +11,12 @@ use platform_tags::{TagPriority, Tags}; use puffin_normalize::{InvalidNameError, PackageName}; #[derive(Debug, Clone, Eq, PartialEq, Hash)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr(feature = "rkyv", archive_attr(derive(Debug)))] pub struct WheelFilename { pub name: PackageName, pub version: Version, diff --git a/crates/distribution-types/Cargo.toml b/crates/distribution-types/Cargo.toml index e39696ed6..125d836c3 100644 --- a/crates/distribution-types/Cargo.toml +++ b/crates/distribution-types/Cargo.toml @@ -24,10 +24,10 @@ puffin-normalize = { path = "../puffin-normalize" } pypi-types = { path = "../pypi-types" } anyhow = { workspace = true } -chrono = { workspace = true, features = ["serde"] } data-encoding = { workspace = true } fs-err = { workspace = true } once_cell = { workspace = true } +rkyv = { workspace = true, features = ["strict", "validation"] } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } diff --git a/crates/distribution-types/src/file.rs b/crates/distribution-types/src/file.rs index dfe80d243..a5b4e0571 100644 --- a/crates/distribution-types/src/file.rs +++ b/crates/distribution-types/src/file.rs @@ -1,12 +1,11 @@ use std::fmt::{Display, Formatter}; use std::path::PathBuf; -use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use thiserror::Error; use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError}; -use pypi_types::{BaseUrl, DistInfoMetadata, Hashes, Yanked}; +use pypi_types::{DistInfoMetadata, Hashes, Yanked}; /// Error converting [`pypi_types::File`] to [`distribution_type::File`]. #[derive(Debug, Error)] @@ -18,32 +17,40 @@ pub enum FileConversionError { } /// Internal analog to [`pypi_types::File`]. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive( + Debug, Clone, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] pub struct File { pub dist_info_metadata: Option, pub filename: String, pub hashes: Hashes, pub requires_python: Option, pub size: Option, - pub upload_time: Option>, + // N.B. We don't use a chrono DateTime here because it's a little + // annoying to do so with rkyv. Since we only use this field for doing + // comparisons in testing, we just store it as a UTC timestamp in + // milliseconds. + pub upload_time_utc_ms: Option, pub url: FileLocation, pub yanked: Option, } impl File { /// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers - pub fn try_from(file: pypi_types::File, base: &BaseUrl) -> Result { + pub fn try_from(file: pypi_types::File, base: &str) -> Result { Ok(Self { dist_info_metadata: file.dist_info_metadata, filename: file.filename, hashes: file.hashes, requires_python: file.requires_python.transpose()?, size: file.size, - upload_time: file.upload_time, + upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()), url: if file.url.contains("://") { FileLocation::AbsoluteUrl(file.url) } else { - FileLocation::RelativeUrl(base.clone(), file.url) + FileLocation::RelativeUrl(base.to_string(), file.url) }, yanked: file.yanked, }) @@ -51,14 +58,18 @@ impl File { } /// While a registry file is generally a remote URL, it can also be a file if it comes from a directory flat indexes. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive( + Debug, Clone, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] pub enum FileLocation { /// URL relative to the base URL. - RelativeUrl(BaseUrl, String), + RelativeUrl(String, String), /// Absolute URL. AbsoluteUrl(String), /// Absolute path to a file. - Path(PathBuf), + Path(#[with(rkyv::with::AsString)] PathBuf), } impl Display for FileLocation { diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 3e07742c5..3b3ace72e 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -705,6 +705,16 @@ impl Identifier for &str { } } +impl Identifier for (&str, &str) { + fn distribution_id(&self) -> DistributionId { + DistributionId::new(cache_key::digest(&self)) + } + + fn resource_id(&self) -> ResourceId { + ResourceId::new(cache_key::digest(&self)) + } +} + impl Identifier for (&Url, &str) { fn distribution_id(&self) -> DistributionId { DistributionId::new(cache_key::digest(&self)) @@ -718,7 +728,7 @@ impl Identifier for (&Url, &str) { impl Identifier for FileLocation { fn distribution_id(&self) -> DistributionId { match self { - FileLocation::RelativeUrl(base, url) => (base.as_url(), url.as_str()).distribution_id(), + FileLocation::RelativeUrl(base, url) => (base.as_str(), url.as_str()).distribution_id(), FileLocation::AbsoluteUrl(url) => url.distribution_id(), FileLocation::Path(path) => path.distribution_id(), } @@ -726,7 +736,7 @@ impl Identifier for FileLocation { fn resource_id(&self) -> ResourceId { match self { - FileLocation::RelativeUrl(base, url) => (base.as_url(), url.as_str()).resource_id(), + FileLocation::RelativeUrl(base, url) => (base.as_str(), url.as_str()).resource_id(), FileLocation::AbsoluteUrl(url) => url.resource_id(), FileLocation::Path(path) => path.resource_id(), } diff --git a/crates/pep440-rs/Cargo.toml b/crates/pep440-rs/Cargo.toml index 22ccbdf86..7f2250b29 100644 --- a/crates/pep440-rs/Cargo.toml +++ b/crates/pep440-rs/Cargo.toml @@ -21,6 +21,7 @@ once_cell = { workspace = true } pubgrub = { workspace = true, optional = true } pyo3 = { workspace = true, optional = true, features = ["extension-module", "abi3-py37"] } serde = { workspace = true, features = ["derive"], optional = true } +rkyv = { workspace = true, features = ["strict", "validation"], optional = true } tracing = { workspace = true, optional = true } unicode-width = { workspace = true } unscanny = { workspace = true } diff --git a/crates/pep440-rs/src/lib.rs b/crates/pep440-rs/src/lib.rs index 0f76a449c..2e72e202e 100644 --- a/crates/pep440-rs/src/lib.rs +++ b/crates/pep440-rs/src/lib.rs @@ -38,8 +38,8 @@ pub use version::PyVersion; pub use { version::{ - LocalSegment, Operator, OperatorParseError, PreRelease, Version, VersionParseError, - VersionPattern, VersionPatternParseError, MIN_VERSION, + LocalSegment, Operator, OperatorParseError, PreRelease, PreReleaseKind, Version, + VersionParseError, VersionPattern, VersionPatternParseError, MIN_VERSION, }, version_specifier::{ parse_version_specifiers, VersionSpecifier, VersionSpecifiers, VersionSpecifiersParseError, diff --git a/crates/pep440-rs/src/version.rs b/crates/pep440-rs/src/version.rs index bc14de343..a3255cd30 100644 --- a/crates/pep440-rs/src/version.rs +++ b/crates/pep440-rs/src/version.rs @@ -16,6 +16,15 @@ use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; /// One of `~=` `==` `!=` `<=` `>=` `<` `>` `===` #[derive(Eq, PartialEq, Debug, Hash, Clone, Copy)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr( + feature = "rkyv", + archive_attr(derive(Debug, Eq, PartialEq, PartialOrd, Ord)) +)] #[cfg_attr(feature = "pyo3", pyclass)] pub enum Operator { /// `== 1.2.3` @@ -240,11 +249,29 @@ impl std::fmt::Display for OperatorParseError { /// let version = Version::from_str("1.19").unwrap(); /// ``` #[derive(Clone)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr( + feature = "rkyv", + archive_attr(derive(Debug, Eq, PartialEq, PartialOrd, Ord)) +)] pub struct Version { inner: Arc, } #[derive(Clone, Debug)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr( + feature = "rkyv", + archive_attr(derive(Debug, Eq, PartialEq, PartialOrd, Ord)) +)] enum VersionInner { Small { small: VersionSmall }, Full { full: VersionFull }, @@ -324,7 +351,7 @@ impl Version { /// Returns the pre-relase part of this version, if it exists. #[inline] - pub fn pre(&self) -> Option<(PreRelease, u64)> { + pub fn pre(&self) -> Option { match *self.inner { VersionInner::Small { ref small } => small.pre(), VersionInner::Full { ref full } => full.pre, @@ -425,7 +452,7 @@ impl Version { /// Set the pre-release component and return the updated version. #[inline] - pub fn with_pre(mut self, value: Option<(PreRelease, u64)>) -> Version { + pub fn with_pre(mut self, value: Option) -> Version { if let VersionInner::Small { ref mut small } = Arc::make_mut(&mut self.inner) { if small.set_pre(value) { return self; @@ -581,7 +608,7 @@ impl std::fmt::Display for Version { let pre = self .pre() .as_ref() - .map(|(pre_kind, pre_version)| format!("{pre_kind}{pre_version}")) + .map(|PreRelease { kind, number }| format!("{kind}{number}")) .unwrap_or_default(); let post = self .post() @@ -746,6 +773,15 @@ impl FromStr for Version { /// incredibly rare. Virtually all versions have zero or one pre, dev or post /// release components. #[derive(Clone, Debug)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr( + feature = "rkyv", + archive_attr(derive(Debug, Eq, PartialEq, PartialOrd, Ord)) +)] struct VersionSmall { /// The representation discussed above. repr: u64, @@ -869,23 +905,23 @@ impl VersionSmall { } #[inline] - fn pre(&self) -> Option<(PreRelease, u64)> { + fn pre(&self) -> Option { let v = (self.repr >> 8) & 0xFF; if v == 0xFF { return None; } let number = v & 0b0011_1111; let kind = match v >> 6 { - 0 => PreRelease::Alpha, - 1 => PreRelease::Beta, - 2 => PreRelease::Rc, + 0 => PreReleaseKind::Alpha, + 1 => PreReleaseKind::Beta, + 2 => PreReleaseKind::Rc, _ => unreachable!(), }; - Some((kind, number)) + Some(PreRelease { kind, number }) } #[inline] - fn set_pre(&mut self, value: Option<(PreRelease, u64)>) -> bool { + fn set_pre(&mut self, value: Option) -> bool { if value.is_some() && (self.post().is_some() || self.dev().is_some()) { return false; } @@ -893,14 +929,14 @@ impl VersionSmall { None => { self.repr |= 0xFF << 8; } - Some((kind, number)) => { + Some(PreRelease { kind, number }) => { if number > 0b0011_1111 { return false; } let kind = match kind { - PreRelease::Alpha => 0, - PreRelease::Beta => 1, - PreRelease::Rc => 2, + PreReleaseKind::Alpha => 0, + PreReleaseKind::Beta => 1, + PreReleaseKind::Rc => 2, }; self.repr &= !(0xFF << 8); self.repr |= ((kind << 6) | number) << 8; @@ -956,6 +992,15 @@ impl VersionSmall { /// In general, the "full" representation is rarely used in practice since most /// versions will fit into the "small" representation. #[derive(Clone, Debug)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr( + feature = "rkyv", + archive_attr(derive(Debug, Eq, PartialEq, PartialOrd, Ord)) +)] struct VersionFull { /// The [versioning /// epoch](https://peps.python.org/pep-0440/#version-epochs). Normally @@ -973,7 +1018,7 @@ struct VersionFull { /// /// Note that whether this is Some influences the version range /// matching since normally we exclude all prerelease versions - pre: Option<(PreRelease, u64)>, + pre: Option, /// The [Post release /// version](https://peps.python.org/pep-0440/#post-releases), higher /// post version are preferred over lower post or none-post versions @@ -1066,12 +1111,40 @@ impl FromStr for VersionPattern { } } +/// An optional pre-release modifier and number applied to a version. +#[derive(PartialEq, Eq, Debug, Hash, Clone, Copy, Ord, PartialOrd)] +#[cfg_attr(feature = "pyo3", pyclass)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr( + feature = "rkyv", + archive_attr(derive(Debug, Eq, PartialEq, PartialOrd, Ord)) +)] +pub struct PreRelease { + /// The kind of pre-release. + pub kind: PreReleaseKind, + /// The number associated with the pre-release. + pub number: u64, +} + /// Optional prerelease modifier (alpha, beta or release candidate) appended to version /// /// #[derive(PartialEq, Eq, Debug, Hash, Clone, Copy, Ord, PartialOrd)] #[cfg_attr(feature = "pyo3", pyclass)] -pub enum PreRelease { +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr( + feature = "rkyv", + archive_attr(derive(Debug, Eq, PartialEq, PartialOrd, Ord)) +)] +pub enum PreReleaseKind { /// alpha prerelease Alpha, /// beta prerelease @@ -1080,7 +1153,7 @@ pub enum PreRelease { Rc, } -impl std::fmt::Display for PreRelease { +impl std::fmt::Display for PreReleaseKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Alpha => write!(f, "a"), @@ -1106,6 +1179,15 @@ impl std::fmt::Display for PreRelease { /// /// Luckily the default `Ord` implementation for `Vec` matches the PEP 440 rules. #[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr( + feature = "rkyv", + archive_attr(derive(Debug, Eq, PartialEq, PartialOrd, Ord)) +)] pub enum LocalSegment { /// Not-parseable as integer segment of local version String(String), @@ -1160,7 +1242,7 @@ struct Parser<'a> { /// The release numbers extracted from the version. release: ReleaseNumbers, /// The pre-release version, if any. - pre: Option<(PreRelease, u64)>, + pre: Option, /// The post-release version, if any. post: Option, /// The dev release, if any. @@ -1384,15 +1466,15 @@ impl<'a> Parser<'a> { // since the strings are matched in order. const SPELLINGS: StringSet = StringSet::new(&["alpha", "beta", "preview", "pre", "rc", "a", "b", "c"]); - const MAP: &[PreRelease] = &[ - PreRelease::Alpha, - PreRelease::Beta, - PreRelease::Rc, - PreRelease::Rc, - PreRelease::Rc, - PreRelease::Alpha, - PreRelease::Beta, - PreRelease::Rc, + const MAP: &[PreReleaseKind] = &[ + PreReleaseKind::Alpha, + PreReleaseKind::Beta, + PreReleaseKind::Rc, + PreReleaseKind::Rc, + PreReleaseKind::Rc, + PreReleaseKind::Alpha, + PreReleaseKind::Beta, + PreReleaseKind::Rc, ]; let oldpos = self.i; @@ -1410,7 +1492,7 @@ impl<'a> Parser<'a> { // Under the normalization rules, a pre-release without an // explicit number defaults to `0`. let number = self.parse_number()?.unwrap_or(0); - self.pre = Some((kind, number)); + self.pre = Some(PreRelease { kind, number }); Ok(()) } @@ -1991,7 +2073,7 @@ impl PyVersion { /// Note that whether this is Some influences the version /// range matching since normally we exclude all prerelease versions #[getter] - pub fn pre(&self) -> Option<(PreRelease, u64)> { + pub fn pre(&self) -> Option { self.0.pre() } /// The [Post release version](https://peps.python.org/pep-0440/#post-releases), @@ -2134,17 +2216,32 @@ fn sortable_tuple(version: &Version) -> (u64, u64, Option, u64, &[LocalSegm // dev release (None, None, Some(n)) => (0, 0, None, n, version.local()), // alpha release - (Some((PreRelease::Alpha, n)), post, dev) => { - (1, n, post, dev.unwrap_or(u64::MAX), version.local()) - } + ( + Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: n, + }), + post, + dev, + ) => (1, n, post, dev.unwrap_or(u64::MAX), version.local()), // beta release - (Some((PreRelease::Beta, n)), post, dev) => { - (2, n, post, dev.unwrap_or(u64::MAX), version.local()) - } + ( + Some(PreRelease { + kind: PreReleaseKind::Beta, + number: n, + }), + post, + dev, + ) => (2, n, post, dev.unwrap_or(u64::MAX), version.local()), // alpha release - (Some((PreRelease::Rc, n)), post, dev) => { - (3, n, post, dev.unwrap_or(u64::MAX), version.local()) - } + ( + Some(PreRelease { + kind: PreReleaseKind::Rc, + number: n, + }), + post, + dev, + ) => (3, n, post, dev.unwrap_or(u64::MAX), version.local()), // final release (None, None, None) => (4, 0, None, 0, version.local()), // post release @@ -2236,70 +2333,109 @@ mod tests { ("1.0.dev456", Version::new([1, 0]).with_dev(Some(456))), ( "1.0a1", - Version::new([1, 0]).with_pre(Some((PreRelease::Alpha, 1))), + Version::new([1, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 1, + })), ), ( "1.0a2.dev456", Version::new([1, 0]) - .with_pre(Some((PreRelease::Alpha, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 2, + })) .with_dev(Some(456)), ), ( "1.0a12.dev456", Version::new([1, 0]) - .with_pre(Some((PreRelease::Alpha, 12))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 12, + })) .with_dev(Some(456)), ), ( "1.0a12", - Version::new([1, 0]).with_pre(Some((PreRelease::Alpha, 12))), + Version::new([1, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 12, + })), ), ( "1.0b1.dev456", Version::new([1, 0]) - .with_pre(Some((PreRelease::Beta, 1))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 1, + })) .with_dev(Some(456)), ), ( "1.0b2", - Version::new([1, 0]).with_pre(Some((PreRelease::Beta, 2))), + Version::new([1, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 2, + })), ), ( "1.0b2.post345.dev456", Version::new([1, 0]) - .with_pre(Some((PreRelease::Beta, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 2, + })) .with_dev(Some(456)) .with_post(Some(345)), ), ( "1.0b2.post345", Version::new([1, 0]) - .with_pre(Some((PreRelease::Beta, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 2, + })) .with_post(Some(345)), ), ( "1.0b2-346", Version::new([1, 0]) - .with_pre(Some((PreRelease::Beta, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 2, + })) .with_post(Some(346)), ), ( "1.0c1.dev456", Version::new([1, 0]) - .with_pre(Some((PreRelease::Rc, 1))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1, + })) .with_dev(Some(456)), ), ( "1.0c1", - Version::new([1, 0]).with_pre(Some((PreRelease::Rc, 1))), + Version::new([1, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1, + })), ), ( "1.0rc2", - Version::new([1, 0]).with_pre(Some((PreRelease::Rc, 2))), + Version::new([1, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 2, + })), ), ( "1.0c3", - Version::new([1, 0]).with_pre(Some((PreRelease::Rc, 3))), + Version::new([1, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 3, + })), ), ("1.0", Version::new([1, 0])), ( @@ -2362,46 +2498,67 @@ mod tests { "1!1.0a1", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Alpha, 1))), + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 1, + })), ), ( "1!1.0a2.dev456", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Alpha, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 2, + })) .with_dev(Some(456)), ), ( "1!1.0a12.dev456", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Alpha, 12))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 12, + })) .with_dev(Some(456)), ), ( "1!1.0a12", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Alpha, 12))), + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 12, + })), ), ( "1!1.0b1.dev456", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Beta, 1))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 1, + })) .with_dev(Some(456)), ), ( "1!1.0b2", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Beta, 2))), + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 2, + })), ), ( "1!1.0b2.post345.dev456", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Beta, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 2, + })) .with_post(Some(345)) .with_dev(Some(456)), ), @@ -2409,40 +2566,58 @@ mod tests { "1!1.0b2.post345", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Beta, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 2, + })) .with_post(Some(345)), ), ( "1!1.0b2-346", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Beta, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 2, + })) .with_post(Some(346)), ), ( "1!1.0c1.dev456", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Rc, 1))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1, + })) .with_dev(Some(456)), ), ( "1!1.0c1", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Rc, 1))), + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1, + })), ), ( "1!1.0rc2", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Rc, 2))), + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 2, + })), ), ( "1!1.0c3", Version::new([1, 0]) .with_epoch(1) - .with_pre(Some((PreRelease::Rc, 3))), + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 3, + })), ), ("1!1.0", Version::new([1, 0]).with_epoch(1)), ( @@ -2812,7 +2987,10 @@ mod tests { assert_eq!( p("1.0a1.*").unwrap_err(), ErrorKind::UnexpectedEnd { - version: Version::new([1, 0]).with_pre(Some((PreRelease::Alpha, 1))), + version: Version::new([1, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 1 + })), remaining: ".*".to_string() } .into(), @@ -2858,79 +3036,136 @@ mod tests { // pre-release tests assert_eq!( p("5a1"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 1))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 1 + })) ); assert_eq!( p("5alpha1"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 1))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 1 + })) ); assert_eq!( p("5b1"), - Version::new([5]).with_pre(Some((PreRelease::Beta, 1))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 1 + })) ); assert_eq!( p("5beta1"), - Version::new([5]).with_pre(Some((PreRelease::Beta, 1))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Beta, + number: 1 + })) ); assert_eq!( p("5rc1"), - Version::new([5]).with_pre(Some((PreRelease::Rc, 1))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1 + })) ); assert_eq!( p("5c1"), - Version::new([5]).with_pre(Some((PreRelease::Rc, 1))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1 + })) ); assert_eq!( p("5preview1"), - Version::new([5]).with_pre(Some((PreRelease::Rc, 1))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1 + })) ); assert_eq!( p("5pre1"), - Version::new([5]).with_pre(Some((PreRelease::Rc, 1))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1 + })) ); assert_eq!( p("5.6.7pre1"), - Version::new([5, 6, 7]).with_pre(Some((PreRelease::Rc, 1))) + Version::new([5, 6, 7]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Rc, + number: 1 + })) ); assert_eq!( p("5alpha789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5.alpha789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5-alpha789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5_alpha789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5alpha.789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5alpha-789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5alpha_789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5ALPHA789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5aLpHa789"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 789))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 789 + })) ); assert_eq!( p("5alpha"), - Version::new([5]).with_pre(Some((PreRelease::Alpha, 0))) + Version::new([5]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 0 + })) ); // post-release tests @@ -3048,19 +3283,28 @@ mod tests { assert_eq!( p("5a2post3"), Version::new([5]) - .with_pre(Some((PreRelease::Alpha, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 2 + })) .with_post(Some(3)) ); assert_eq!( p("5.a-2_post-3"), Version::new([5]) - .with_pre(Some((PreRelease::Alpha, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 2 + })) .with_post(Some(3)) ); assert_eq!( p("5a2-3"), Version::new([5]) - .with_pre(Some((PreRelease::Alpha, 2))) + .with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 2 + })) .with_post(Some(3)) ); diff --git a/crates/pep440-rs/src/version_specifier.rs b/crates/pep440-rs/src/version_specifier.rs index 1e54d00e9..5eef7c148 100644 --- a/crates/pep440-rs/src/version_specifier.rs +++ b/crates/pep440-rs/src/version_specifier.rs @@ -36,6 +36,12 @@ use crate::{ /// assert_eq!(version_specifiers.iter().position(|specifier| *specifier.operator() == Operator::LessThan), Some(1)); /// ``` #[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr(feature = "rkyv", archive_attr(derive(Debug)))] #[cfg_attr(feature = "pyo3", pyclass(sequence))] pub struct VersionSpecifiers(Vec); @@ -240,6 +246,12 @@ impl std::error::Error for VersionSpecifiersParseError {} /// assert!(version_specifier.contains(&version)); /// ``` #[derive(Eq, PartialEq, Debug, Clone, Hash)] +#[cfg_attr( + feature = "rkyv", + derive(rkyv::Archive, rkyv::Deserialize, rkyv::Serialize) +)] +#[cfg_attr(feature = "rkyv", archive(check_bytes))] +#[cfg_attr(feature = "rkyv", archive_attr(derive(Debug)))] #[cfg_attr(feature = "pyo3", pyclass(get_all))] pub struct VersionSpecifier { /// ~=|==|!=|<=|>=|<|>|===, plus whether the version ended with a star @@ -727,7 +739,7 @@ mod tests { use indoc::indoc; - use crate::{LocalSegment, PreRelease}; + use crate::{LocalSegment, PreRelease, PreReleaseKind}; use super::*; @@ -1436,7 +1448,10 @@ mod tests { "==2.0a1.*", ParseErrorKind::InvalidVersion( version::ErrorKind::UnexpectedEnd { - version: Version::new([2, 0]).with_pre(Some((PreRelease::Alpha, 1))), + version: Version::new([2, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 1, + })), remaining: ".*".to_string(), } .into(), @@ -1447,7 +1462,10 @@ mod tests { "!=2.0a1.*", ParseErrorKind::InvalidVersion( version::ErrorKind::UnexpectedEnd { - version: Version::new([2, 0]).with_pre(Some((PreRelease::Alpha, 1))), + version: Version::new([2, 0]).with_pre(Some(PreRelease { + kind: PreReleaseKind::Alpha, + number: 1, + })), remaining: ".*".to_string(), } .into(), diff --git a/crates/pep508-rs/Cargo.toml b/crates/pep508-rs/Cargo.toml index faef5cdca..92fdc9b14 100644 --- a/crates/pep508-rs/Cargo.toml +++ b/crates/pep508-rs/Cargo.toml @@ -25,6 +25,7 @@ once_cell = { workspace = true } pyo3 = { workspace = true, optional = true, features = ["abi3", "extension-module"] } pyo3-log = { workspace = true, optional = true } regex = { workspace = true } +rkyv = { workspace = true, features = ["strict"], optional = true } serde = { workspace = true, features = ["derive"], optional = true } serde_json = { workspace = true, optional = true } thiserror = { workspace = true } @@ -40,5 +41,6 @@ testing_logger = { version = "0.1.1" } [features] pyo3 = ["dep:pyo3", "pep440_rs/pyo3", "pyo3-log"] +rkyv = ["dep:rkyv", "pep440_rs/rkyv"] serde = ["dep:serde", "pep440_rs/serde"] default = [] diff --git a/crates/pep508-rs/src/lib.rs b/crates/pep508-rs/src/lib.rs index aecbd91f5..fd6937a1a 100644 --- a/crates/pep508-rs/src/lib.rs +++ b/crates/pep508-rs/src/lib.rs @@ -11,7 +11,7 @@ //! let marker = r#"requests [security,tests] >= 2.8.1, == 2.8.* ; python_version > "3.8""#; //! let dependency_specification = Requirement::from_str(marker).unwrap(); //! assert_eq!(dependency_specification.name.as_ref(), "requests"); -//! assert_eq!(dependency_specification.extras, Some(vec![ExtraName::from_str("security").unwrap(), ExtraName::from_str("tests").unwrap()])); +//! assert_eq!(dependency_specification.extras, vec![ExtraName::from_str("security").unwrap(), ExtraName::from_str("tests").unwrap()]); //! ``` #![deny(missing_docs)] diff --git a/crates/puffin-client/Cargo.toml b/crates/puffin-client/Cargo.toml index 3e6099cab..1b5b3e686 100644 --- a/crates/puffin-client/Cargo.toml +++ b/crates/puffin-client/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [dependencies] cache-key = { path = "../cache-key" } -distribution-filename = { path = "../distribution-filename", features = ["serde"] } +distribution-filename = { path = "../distribution-filename", features = ["rkyv", "serde"] } distribution-types = { path = "../distribution-types" } install-wheel-rs = { path = "../install-wheel-rs" } pep440_rs = { path = "../pep440-rs" } @@ -27,6 +27,7 @@ http-cache-semantics = { workspace = true } reqwest = { workspace = true } reqwest-middleware = { workspace = true } reqwest-retry = { workspace = true } +rkyv = { workspace = true, features = ["strict", "validation"] } rmp-serde = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true } diff --git a/crates/puffin-client/src/error.rs b/crates/puffin-client/src/error.rs index 57e1a79f7..af0af048c 100644 --- a/crates/puffin-client/src/error.rs +++ b/crates/puffin-client/src/error.rs @@ -41,6 +41,10 @@ pub enum ErrorKind { #[error(transparent)] UrlParseError(#[from] url::ParseError), + /// A base URL could not be joined with a possibly relative URL. + #[error(transparent)] + JoinRelativeError(#[from] pypi_types::JoinRelativeError), + /// Dist-info error #[error(transparent)] InstallWheel(#[from] install_wheel_rs::Error), diff --git a/crates/puffin-client/src/flat_index.rs b/crates/puffin-client/src/flat_index.rs index a64c029d7..f62e01409 100644 --- a/crates/puffin-client/src/flat_index.rs +++ b/crates/puffin-client/src/flat_index.rs @@ -116,7 +116,7 @@ impl<'a> FlatIndexClient<'a> { let files: Vec = files .into_iter() .filter_map(|file| { - match File::try_from(file, &base) { + match File::try_from(file, base.as_url().as_str()) { Ok(file) => Some(file), Err(err) => { // Ignore files with unparseable version specifiers. @@ -178,7 +178,7 @@ impl<'a> FlatIndexClient<'a> { hashes: Hashes { sha256: None }, requires_python: None, size: None, - upload_time: None, + upload_time_utc_ms: None, url: FileLocation::Path(entry.path().to_path_buf()), yanked: None, }; diff --git a/crates/puffin-client/src/lib.rs b/crates/puffin-client/src/lib.rs index 83cf06aaa..93f5d9a2f 100644 --- a/crates/puffin-client/src/lib.rs +++ b/crates/puffin-client/src/lib.rs @@ -2,7 +2,8 @@ pub use cached_client::{CacheControl, CachedClient, CachedClientError, DataWithC pub use error::{Error, ErrorKind}; pub use flat_index::{FlatDistributions, FlatIndex, FlatIndexClient, FlatIndexError}; pub use registry_client::{ - read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, VersionFiles, + read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, SimpleMetadatum, + VersionFiles, }; mod cache_headers; diff --git a/crates/puffin-client/src/registry_client.rs b/crates/puffin-client/src/registry_client.rs index 1c35fa4dd..a03b60abf 100644 --- a/crates/puffin-client/src/registry_client.rs +++ b/crates/puffin-client/src/registry_client.rs @@ -22,7 +22,7 @@ use install_wheel_rs::find_dist_info; use pep440_rs::Version; use puffin_cache::{Cache, CacheBucket, WheelCache}; use puffin_normalize::PackageName; -use pypi_types::{BaseUrl, Metadata21, SimpleJson}; +use pypi_types::{Metadata21, SimpleJson}; use crate::cached_client::CacheControl; use crate::html::SimpleHtml; @@ -206,15 +206,16 @@ impl RegistryClient { let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?; let data: SimpleJson = serde_json::from_slice(bytes.as_ref()) .map_err(|err| Error::from_json_err(err, url.clone()))?; - let base = BaseUrl::from(url.clone()); - let metadata = SimpleMetadata::from_files(data.files, package_name, &base); + let metadata = + SimpleMetadata::from_files(data.files, package_name, url.as_str()); Ok(metadata) } MediaType::Html => { let text = response.text().await.map_err(ErrorKind::RequestError)?; let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url) .map_err(|err| Error::from_html_err(err, url.clone()))?; - let metadata = SimpleMetadata::from_files(files, package_name, &base); + let metadata = + SimpleMetadata::from_files(files, package_name, base.as_url().as_str()); Ok(metadata) } } @@ -245,7 +246,8 @@ impl RegistryClient { let metadata = match &built_dist { BuiltDist::Registry(wheel) => match &wheel.file.url { FileLocation::RelativeUrl(base, url) => { - let url = base.join_relative(url).map_err(ErrorKind::UrlParseError)?; + let url = pypi_types::base_url_join_relative(base, url) + .map_err(ErrorKind::JoinRelativeError)?; self.wheel_metadata_registry(&wheel.index, &wheel.file, &url) .await? } @@ -494,46 +496,78 @@ pub async fn read_metadata_async( Ok(metadata) } -#[derive(Default, Debug, Serialize, Deserialize)] +#[derive( + Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] pub struct VersionFiles { - pub wheels: Vec<(WheelFilename, File)>, - pub source_dists: Vec<(SourceDistFilename, File)>, + pub wheels: Vec, + pub source_dists: Vec, } impl VersionFiles { fn push(&mut self, filename: DistFilename, file: File) { match filename { - DistFilename::WheelFilename(inner) => self.wheels.push((inner, file)), - DistFilename::SourceDistFilename(inner) => self.source_dists.push((inner, file)), + DistFilename::WheelFilename(name) => self.wheels.push(VersionWheel { name, file }), + DistFilename::SourceDistFilename(name) => { + self.source_dists.push(VersionSourceDist { name, file }) + } } } pub fn all(self) -> impl Iterator { self.wheels .into_iter() - .map(|(filename, file)| (DistFilename::WheelFilename(filename), file)) + .map(|VersionWheel { name, file }| (DistFilename::WheelFilename(name), file)) .chain( self.source_dists .into_iter() - .map(|(filename, file)| (DistFilename::SourceDistFilename(filename), file)), + .map(|VersionSourceDist { name, file }| { + (DistFilename::SourceDistFilename(name), file) + }), ) } } -#[derive(Default, Debug, Serialize, Deserialize)] -pub struct SimpleMetadata(BTreeMap); +#[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] +pub struct VersionWheel { + pub name: WheelFilename, + pub file: File, +} + +#[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] +pub struct VersionSourceDist { + pub name: SourceDistFilename, + pub file: File, +} + +#[derive( + Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] +pub struct SimpleMetadata(Vec); + +#[derive(Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] +pub struct SimpleMetadatum { + pub version: Version, + pub files: VersionFiles, +} impl SimpleMetadata { - pub fn iter(&self) -> impl DoubleEndedIterator { + pub fn iter(&self) -> impl DoubleEndedIterator { self.0.iter() } - fn from_files( - files: Vec, - package_name: &PackageName, - base: &BaseUrl, - ) -> Self { - let mut metadata = Self::default(); + fn from_files(files: Vec, package_name: &PackageName, base: &str) -> Self { + let mut map: BTreeMap = BTreeMap::default(); // Group the distributions by version and kind for file in files { @@ -553,7 +587,7 @@ impl SimpleMetadata { continue; } }; - match metadata.0.entry(version.clone()) { + match map.entry(version.clone()) { std::collections::btree_map::Entry::Occupied(mut entry) => { entry.get_mut().push(filename, file); } @@ -565,14 +599,17 @@ impl SimpleMetadata { } } } - - metadata + SimpleMetadata( + map.into_iter() + .map(|(version, files)| SimpleMetadatum { version, files }) + .collect(), + ) } } impl IntoIterator for SimpleMetadata { - type Item = (Version, VersionFiles); - type IntoIter = std::collections::btree_map::IntoIter; + type Item = SimpleMetadatum; + type IntoIter = std::vec::IntoIter; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() @@ -607,12 +644,10 @@ impl MediaType { mod tests { use std::str::FromStr; - use url::Url; - use puffin_normalize::PackageName; - use pypi_types::{BaseUrl, SimpleJson}; + use pypi_types::SimpleJson; - use crate::SimpleMetadata; + use crate::{SimpleMetadata, SimpleMetadatum}; #[test] fn ignore_failing_files() { @@ -650,15 +685,15 @@ mod tests { } "#; let data: SimpleJson = serde_json::from_str(response).unwrap(); - let base = BaseUrl::from(Url::from_str("https://pypi.org/simple/pyflyby/").unwrap()); + let base = "https://pypi.org/simple/pyflyby/"; let simple_metadata = SimpleMetadata::from_files( data.files, &PackageName::from_str("pyflyby").unwrap(), - &base, + base, ); let versions: Vec = simple_metadata .iter() - .map(|(version, _)| version.to_string()) + .map(|SimpleMetadatum { version, .. }| version.to_string()) .collect(); assert_eq!(versions, ["1.7.8".to_string()]); } diff --git a/crates/puffin-dev/src/resolve_many.rs b/crates/puffin-dev/src/resolve_many.rs index 2215f124b..1c37b2b5b 100644 --- a/crates/puffin-dev/src/resolve_many.rs +++ b/crates/puffin-dev/src/resolve_many.rs @@ -49,7 +49,7 @@ async fn find_latest_version( package_name: &PackageName, ) -> Option { let (_, simple_metadata) = client.simple(package_name).await.ok()?; - let (version, _) = simple_metadata.into_iter().next()?; + let version = simple_metadata.into_iter().next()?.version; Some(version.clone()) } diff --git a/crates/puffin-distribution/src/distribution_database.rs b/crates/puffin-distribution/src/distribution_database.rs index 1ba253c8f..3e1e0169e 100644 --- a/crates/puffin-distribution/src/distribution_database.rs +++ b/crates/puffin-distribution/src/distribution_database.rs @@ -92,9 +92,9 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } let url = match &wheel.file.url { - FileLocation::RelativeUrl(base, url) => base - .join_relative(url) - .map_err(|err| Error::Url(url.clone(), err))?, + FileLocation::RelativeUrl(base, url) => { + pypi_types::base_url_join_relative(base, url)? + } FileLocation::AbsoluteUrl(url) => { Url::parse(url).map_err(|err| Error::Url(url.clone(), err))? } diff --git a/crates/puffin-distribution/src/error.rs b/crates/puffin-distribution/src/error.rs index 2f3eb579a..d68906406 100644 --- a/crates/puffin-distribution/src/error.rs +++ b/crates/puffin-distribution/src/error.rs @@ -14,6 +14,8 @@ pub enum Error { // Network error #[error("Failed to parse URL: `{0}`")] Url(String, #[source] url::ParseError), + #[error(transparent)] + JoinRelativeUrl(#[from] pypi_types::JoinRelativeError), #[error("Git operation failed")] Git(#[source] anyhow::Error), #[error(transparent)] diff --git a/crates/puffin-distribution/src/source/mod.rs b/crates/puffin-distribution/src/source/mod.rs index 2537ce7fb..d427db914 100644 --- a/crates/puffin-distribution/src/source/mod.rs +++ b/crates/puffin-distribution/src/source/mod.rs @@ -105,9 +105,9 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } SourceDist::Registry(registry_source_dist) => { let url = match ®istry_source_dist.file.url { - FileLocation::RelativeUrl(base, url) => base - .join_relative(url) - .map_err(|err| Error::Url(url.clone(), err))?, + FileLocation::RelativeUrl(base, url) => { + pypi_types::base_url_join_relative(base, url)? + } FileLocation::AbsoluteUrl(url) => { Url::parse(url).map_err(|err| Error::Url(url.clone(), err))? } @@ -182,9 +182,9 @@ impl<'a, T: BuildContext> SourceDistCachedBuilder<'a, T> { } SourceDist::Registry(registry_source_dist) => { let url = match ®istry_source_dist.file.url { - FileLocation::RelativeUrl(base, url) => base - .join_relative(url) - .map_err(|err| Error::Url(url.clone(), err))?, + FileLocation::RelativeUrl(base, url) => { + pypi_types::base_url_join_relative(base, url)? + } FileLocation::AbsoluteUrl(url) => { Url::parse(url).map_err(|err| Error::Url(url.clone(), err))? } diff --git a/crates/puffin-normalize/Cargo.toml b/crates/puffin-normalize/Cargo.toml index 1ee1de5a7..21c7f23f8 100644 --- a/crates/puffin-normalize/Cargo.toml +++ b/crates/puffin-normalize/Cargo.toml @@ -6,3 +6,4 @@ description = "Normalization for distribution, package and extra anmes" [dependencies] serde = { workspace = true, features = ["derive"] } +rkyv = { workspace = true, features = ["strict", "validation"] } diff --git a/crates/puffin-normalize/src/package_name.rs b/crates/puffin-normalize/src/package_name.rs index 402cd0b71..6bbd69ee7 100644 --- a/crates/puffin-normalize/src/package_name.rs +++ b/crates/puffin-normalize/src/package_name.rs @@ -11,7 +11,21 @@ use crate::{validate_and_normalize_owned, validate_and_normalize_ref, InvalidNam /// down to a single `-`, e.g., `---`, `.`, and `__` all get converted to just `-`. /// /// See: -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize)] +#[derive( + Debug, + Clone, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + Serialize, + rkyv::Archive, + rkyv::Deserialize, + rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] pub struct PackageName(String); impl PackageName { diff --git a/crates/puffin-resolver/src/finder.rs b/crates/puffin-resolver/src/finder.rs index f628ae698..33412cdfb 100644 --- a/crates/puffin-resolver/src/finder.rs +++ b/crates/puffin-resolver/src/finder.rs @@ -11,7 +11,9 @@ use distribution_filename::DistFilename; use distribution_types::{Dist, IndexUrl, Resolution}; use pep508_rs::{Requirement, VersionOrUrl}; use platform_tags::Tags; -use puffin_client::{FlatDistributions, FlatIndex, RegistryClient, SimpleMetadata}; +use puffin_client::{ + FlatDistributions, FlatIndex, RegistryClient, SimpleMetadata, SimpleMetadatum, +}; use puffin_interpreter::Interpreter; use puffin_normalize::PackageName; @@ -158,7 +160,7 @@ impl<'a> DistFinder<'a> { (None, None, None) }; - for (version, files) in metadata.into_iter().rev() { + for SimpleMetadatum { version, files } in metadata.into_iter().rev() { // If we iterated past the first-compatible version, break. if best_version .as_ref() @@ -174,31 +176,30 @@ impl<'a> DistFinder<'a> { if !no_binary { // Find the most-compatible wheel - for (wheel, file) in files.wheels { + for version_wheel in files.wheels { // Only add dists compatible with the python version. // This is relevant for source dists which give no other indication of their // compatibility and wheels which may be tagged `py3-none-any` but // have `requires-python: ">=3.9"` - if !file - .requires_python - .as_ref() - .map_or(true, |requires_python| { + if !version_wheel.file.requires_python.as_ref().map_or( + true, + |requires_python| { requires_python.contains(self.interpreter.python_version()) - }) - { + }, + ) { continue; } best_version = Some(version.clone()); - if let Some(priority) = wheel.compatibility(self.tags) { + if let Some(priority) = version_wheel.name.compatibility(self.tags) { if best_wheel .as_ref() .map_or(true, |(.., existing)| priority > *existing) { best_wheel = Some(( Dist::from_registry( - DistFilename::WheelFilename(wheel), - file, + DistFilename::WheelFilename(version_wheel.name), + version_wheel.file, index.clone(), ), priority, @@ -210,25 +211,24 @@ impl<'a> DistFinder<'a> { // Find the most-compatible sdist, if no wheel was found. if best_wheel.is_none() { - for (source_dist, file) in files.source_dists { + for version_sdist in files.source_dists { // Only add dists compatible with the python version. // This is relevant for source dists which give no other indication of their // compatibility and wheels which may be tagged `py3-none-any` but // have `requires-python: ">=3.9"` - if !file - .requires_python - .as_ref() - .map_or(true, |requires_python| { + if !version_sdist.file.requires_python.as_ref().map_or( + true, + |requires_python| { requires_python.contains(self.interpreter.python_version()) - }) - { + }, + ) { continue; } - best_version = Some(source_dist.version.clone()); + best_version = Some(version_sdist.name.version.clone()); best_sdist = Some(Dist::from_registry( - DistFilename::SourceDistFilename(source_dist), - file, + DistFilename::SourceDistFilename(version_sdist.name), + version_sdist.file, index.clone(), )); } diff --git a/crates/puffin-resolver/src/pubgrub/specifier.rs b/crates/puffin-resolver/src/pubgrub/specifier.rs index 7dc59ed2b..dad945fee 100644 --- a/crates/puffin-resolver/src/pubgrub/specifier.rs +++ b/crates/puffin-resolver/src/pubgrub/specifier.rs @@ -1,7 +1,7 @@ use anyhow::Result; use pubgrub::range::Range; -use pep440_rs::{Operator, Version, VersionSpecifier}; +use pep440_rs::{Operator, PreRelease, Version, VersionSpecifier}; use crate::ResolveError; @@ -68,10 +68,9 @@ impl TryFrom<&VersionSpecifier> for PubGrubSpecifier { if let Some(post) = high.post() { high = high.with_post(Some(post + 1)); } else if let Some(pre) = high.pre() { - high = high.with_pre(Some(match pre { - (pep440_rs::PreRelease::Rc, n) => (pep440_rs::PreRelease::Rc, n + 1), - (pep440_rs::PreRelease::Alpha, n) => (pep440_rs::PreRelease::Alpha, n + 1), - (pep440_rs::PreRelease::Beta, n) => (pep440_rs::PreRelease::Beta, n + 1), + high = high.with_pre(Some(PreRelease { + kind: pre.kind, + number: pre.number + 1, })); } else { let mut release = high.release().to_vec(); @@ -86,10 +85,9 @@ impl TryFrom<&VersionSpecifier> for PubGrubSpecifier { if let Some(post) = high.post() { high = high.with_post(Some(post + 1)); } else if let Some(pre) = high.pre() { - high = high.with_pre(Some(match pre { - (pep440_rs::PreRelease::Rc, n) => (pep440_rs::PreRelease::Rc, n + 1), - (pep440_rs::PreRelease::Alpha, n) => (pep440_rs::PreRelease::Alpha, n + 1), - (pep440_rs::PreRelease::Beta, n) => (pep440_rs::PreRelease::Beta, n + 1), + high = high.with_pre(Some(PreRelease { + kind: pre.kind, + number: pre.number + 1, })); } else { let mut release = high.release().to_vec(); diff --git a/crates/puffin-resolver/src/resolver/mod.rs b/crates/puffin-resolver/src/resolver/mod.rs index 998600398..ce6dcc2d2 100644 --- a/crates/puffin-resolver/src/resolver/mod.rs +++ b/crates/puffin-resolver/src/resolver/mod.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use anyhow::Result; use dashmap::{DashMap, DashSet}; use futures::channel::mpsc::UnboundedReceiver; -use futures::{pin_mut, FutureExt, StreamExt}; +use futures::{FutureExt, StreamExt}; use itertools::Itertools; use pubgrub::error::PubGrubError; use pubgrub::range::Range; @@ -202,14 +202,10 @@ impl<'a, Provider: ResolverProvider> Resolver<'a, Provider> { let (request_sink, request_stream) = futures::channel::mpsc::unbounded(); // Run the fetcher. - let requests_fut = self.fetch(request_stream); + let requests_fut = self.fetch(request_stream).fuse(); // Run the solver. - let resolve_fut = self.solve(&request_sink); - - let requests_fut = requests_fut.fuse(); - let resolve_fut = resolve_fut.fuse(); - pin_mut!(requests_fut, resolve_fut); + let resolve_fut = self.solve(&request_sink).fuse(); let resolution = select! { result = requests_fut => { diff --git a/crates/puffin-resolver/src/version_map.rs b/crates/puffin-resolver/src/version_map.rs index 08ba2338f..2838deffa 100644 --- a/crates/puffin-resolver/src/version_map.rs +++ b/crates/puffin-resolver/src/version_map.rs @@ -8,7 +8,7 @@ use distribution_filename::DistFilename; use distribution_types::{Dist, IndexUrl, PrioritizedDistribution, ResolvableDist}; use pep440_rs::Version; use platform_tags::Tags; -use puffin_client::{FlatDistributions, SimpleMetadata}; +use puffin_client::{FlatDistributions, SimpleMetadata, SimpleMetadatum}; use puffin_normalize::PackageName; use puffin_traits::NoBinary; use puffin_warnings::warn_user_once; @@ -48,13 +48,13 @@ impl VersionMap { }; // Collect compatible distributions. - for (version, files) in metadata { + for SimpleMetadatum { version, files } in metadata { for (filename, file) in files.all() { // Support resolving as if it were an earlier timestamp, at least as long files have // upload time information. if let Some(exclude_newer) = exclude_newer { - match file.upload_time.as_ref() { - Some(upload_time) if upload_time >= exclude_newer => { + match file.upload_time_utc_ms.as_ref() { + Some(&upload_time) if upload_time >= exclude_newer.timestamp_millis() => { continue; } None => { diff --git a/crates/pypi-types/Cargo.toml b/crates/pypi-types/Cargo.toml index dce1bde26..9d9647b72 100644 --- a/crates/pypi-types/Cargo.toml +++ b/crates/pypi-types/Cargo.toml @@ -13,14 +13,15 @@ license = { workspace = true } workspace = true [dependencies] -pep440_rs = { path = "../pep440-rs", features = ["serde"] } -pep508_rs = { path = "../pep508-rs", features = ["serde"] } +pep440_rs = { path = "../pep440-rs", features = ["rkyv", "serde"] } +pep508_rs = { path = "../pep508-rs", features = ["rkyv", "serde"] } puffin-normalize = { path = "../puffin-normalize" } chrono = { workspace = true, features = ["serde"] } mailparse = { workspace = true } once_cell = { workspace = true } regex = { workspace = true } +rkyv = { workspace = true, features = ["strict", "validation"] } serde = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } diff --git a/crates/pypi-types/src/base_url.rs b/crates/pypi-types/src/base_url.rs index 15ea0a63d..0fdeb868a 100644 --- a/crates/pypi-types/src/base_url.rs +++ b/crates/pypi-types/src/base_url.rs @@ -1,6 +1,47 @@ use serde::{Deserialize, Serialize}; use url::Url; +/// Join a possibly relative URL to a base URL. +/// +/// When `maybe_relative` is not relative, then it is parsed and returned with +/// `base` being ignored. +/// +/// This is useful for parsing URLs that may be absolute or relative, with a +/// known base URL, and that doesn't require having already parsed a `BaseUrl`. +pub fn base_url_join_relative(base: &str, maybe_relative: &str) -> Result { + match Url::parse(maybe_relative) { + Ok(absolute) => Ok(absolute), + Err(err) => { + if err == url::ParseError::RelativeUrlWithoutBase { + let base = Url::parse(base).map_err(|err| JoinRelativeError { + original: base.to_string(), + source: err, + })?; + base.join(maybe_relative).map_err(|err| JoinRelativeError { + original: format!("{base}/{maybe_relative}"), + source: err, + }) + } else { + Err(JoinRelativeError { + original: maybe_relative.to_string(), + source: err, + }) + } + } + } +} + +/// An error that occurs when `base_url_join_relative` fails. +/// +/// The error message includes the URL (`base` or `maybe_relative`) passed to +/// `base_url_join_relative` that provoked the error. +#[derive(Clone, Debug, thiserror::Error)] +#[error("Failed to parse URL: `{original}`")] +pub struct JoinRelativeError { + original: String, + source: url::ParseError, +} + #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] pub struct BaseUrl( #[serde( diff --git a/crates/pypi-types/src/simple_json.rs b/crates/pypi-types/src/simple_json.rs index 1c10f5515..cc1d8c91f 100644 --- a/crates/pypi-types/src/simple_json.rs +++ b/crates/pypi-types/src/simple_json.rs @@ -68,7 +68,11 @@ where )) } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive( + Debug, Clone, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] #[serde(untagged)] pub enum DistInfoMetadata { Bool(bool), @@ -84,7 +88,11 @@ impl DistInfoMetadata { } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive( + Debug, Clone, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] #[serde(untagged)] pub enum Yanked { Bool(bool), @@ -104,7 +112,23 @@ impl Yanked { /// /// PEP 691 says multiple hashes can be included and the interpretation is left to the client, we /// only support SHA 256 atm. -#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash, Default, Serialize, Deserialize)] +#[derive( + Debug, + Clone, + Ord, + PartialOrd, + Eq, + PartialEq, + Hash, + Default, + Serialize, + Deserialize, + rkyv::Archive, + rkyv::Deserialize, + rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] pub struct Hashes { pub sha256: Option, } diff --git a/crates/requirements-txt/Cargo.toml b/crates/requirements-txt/Cargo.toml index 1fc83a3f7..53afec7d6 100644 --- a/crates/requirements-txt/Cargo.toml +++ b/crates/requirements-txt/Cargo.toml @@ -13,8 +13,8 @@ license = { workspace = true } workspace = true [dependencies] -pep440_rs = { path = "../pep440-rs", features = ["serde"] } -pep508_rs = { path = "../pep508-rs", features = ["serde"] } +pep440_rs = { path = "../pep440-rs", features = ["rkyv", "serde"] } +pep508_rs = { path = "../pep508-rs", features = ["rkyv", "serde"] } puffin-fs = { path = "../puffin-fs" } puffin-normalize = { path = "../puffin-normalize" }