From 4cb1595136f26c6bc26acbc10403cefabe93f374 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 24 Jun 2024 14:29:01 -0400 Subject: [PATCH] uv-resolver: refactor lock data type deserialization This commit prepares to make the `source` and `version` fields optional in a `distribution.dependency` based on whether they have an unambiguous value. e.g., When there is exactly one distribution with a matching package name. This refactor effectively defines "wire" types for most of the lock data types (repeating the `WheelWire` and `LockWire` pattern) with one key difference: we don't use serde's `TryFrom` integration. In this refactor, we could have, and it would have worked. But in a subsequent commit, we're going to be adding state to the `unwire()` calls that is impossible to thread through a `TryFrom` implementation. This state will tell us how to populate the `source` and `version` values on a `Dependency` when they're missing. The duplication of types here is unfortunate, but compiler should catch any deviations. And the wire types are unexported, so they have a limited blast radius on complexity. --- crates/uv-resolver/src/lock.rs | 510 ++++++++++++++++++++------------- 1 file changed, 315 insertions(+), 195 deletions(-) diff --git a/crates/uv-resolver/src/lock.rs b/crates/uv-resolver/src/lock.rs index eeab34813..63d0b7b5a 100644 --- a/crates/uv-resolver/src/lock.rs +++ b/crates/uv-resolver/src/lock.rs @@ -30,6 +30,9 @@ use uv_normalize::{ExtraName, GroupName, PackageName}; use crate::resolution::{AnnotatedDist, ResolutionGraphNode}; use crate::{RequiresPython, ResolutionGraph}; +/// The current version of the lock file format. +const VERSION: u32 = 1; + #[derive(Clone, Debug, serde::Deserialize)] #[serde(try_from = "LockWire")] pub struct Lock { @@ -126,21 +129,189 @@ impl Lock { let distributions = locked_dists.into_values().collect(); let requires_python = graph.requires_python.clone(); - let lock = Self::new(distributions, requires_python)?; + let lock = Self::new(VERSION, distributions, requires_python)?; Ok(lock) } /// Initialize a [`Lock`] from a list of [`Distribution`] entries. fn new( - distributions: Vec, + version: u32, + mut distributions: Vec, requires_python: Option, ) -> Result { - let wire = LockWire { - version: 1, + // Put all dependencies for each distribution in a canonical order and + // check for duplicates. + for dist in &mut distributions { + dist.dependencies.sort(); + for windows in dist.dependencies.windows(2) { + let (dep1, dep2) = (&windows[0], &windows[1]); + if dep1 == dep2 { + return Err(LockErrorKind::DuplicateDependency { + id: dist.id.clone(), + dependency: dep1.clone(), + } + .into()); + } + } + + // Perform the same validation for optional dependencies. + for (extra, dependencies) in &mut dist.optional_dependencies { + dependencies.sort(); + for windows in dependencies.windows(2) { + let (dep1, dep2) = (&windows[0], &windows[1]); + if dep1 == dep2 { + return Err(LockErrorKind::DuplicateOptionalDependency { + id: dist.id.clone(), + extra: extra.clone(), + dependency: dep1.clone(), + } + .into()); + } + } + } + + // Perform the same validation for dev dependencies. + for (group, dependencies) in &mut dist.dev_dependencies { + dependencies.sort(); + for windows in dependencies.windows(2) { + let (dep1, dep2) = (&windows[0], &windows[1]); + if dep1 == dep2 { + return Err(LockErrorKind::DuplicateDevDependency { + id: dist.id.clone(), + group: group.clone(), + dependency: dep1.clone(), + } + .into()); + } + } + } + } + distributions.sort_by(|dist1, dist2| dist1.id.cmp(&dist2.id)); + + // Check for duplicate distribution IDs and also build up the map for + // distributions keyed by their ID. + let mut by_id = FxHashMap::default(); + for (i, dist) in distributions.iter().enumerate() { + if by_id.insert(dist.id.clone(), i).is_some() { + return Err(LockErrorKind::DuplicateDistribution { + id: dist.id.clone(), + } + .into()); + } + } + + // Build up a map from ID to extras. + let mut extras_by_id = FxHashMap::default(); + for dist in &distributions { + for extra in dist.optional_dependencies.keys() { + extras_by_id + .entry(dist.id.clone()) + .or_insert_with(FxHashSet::default) + .insert(extra.clone()); + } + } + + // Remove any non-existent extras (e.g., extras that were requested but don't exist). + for dist in &mut distributions { + dist.dependencies.retain(|dep| { + dep.extra.as_ref().map_or(true, |extra| { + extras_by_id + .get(&dep.distribution_id) + .is_some_and(|extras| extras.contains(extra)) + }) + }); + + for dependencies in dist.optional_dependencies.values_mut() { + dependencies.retain(|dep| { + dep.extra.as_ref().map_or(true, |extra| { + extras_by_id + .get(&dep.distribution_id) + .is_some_and(|extras| extras.contains(extra)) + }) + }); + } + + for dependencies in dist.dev_dependencies.values_mut() { + dependencies.retain(|dep| { + dep.extra.as_ref().map_or(true, |extra| { + extras_by_id + .get(&dep.distribution_id) + .is_some_and(|extras| extras.contains(extra)) + }) + }); + } + } + + // Check that every dependency has an entry in `by_id`. If any don't, + // it implies we somehow have a dependency with no corresponding locked + // distribution. + for dist in &distributions { + for dep in &dist.dependencies { + if !by_id.contains_key(&dep.distribution_id) { + return Err(LockErrorKind::UnrecognizedDependency { + id: dist.id.clone(), + dependency: dep.clone(), + } + .into()); + } + } + + // Perform the same validation for optional dependencies. + for dependencies in dist.optional_dependencies.values() { + for dep in dependencies { + if !by_id.contains_key(&dep.distribution_id) { + return Err(LockErrorKind::UnrecognizedDependency { + id: dist.id.clone(), + dependency: dep.clone(), + } + .into()); + } + } + } + + // Perform the same validation for dev dependencies. + for dependencies in dist.dev_dependencies.values() { + for dep in dependencies { + if !by_id.contains_key(&dep.distribution_id) { + return Err(LockErrorKind::UnrecognizedDependency { + id: dist.id.clone(), + dependency: dep.clone(), + } + .into()); + } + } + } + + // Also check that our sources are consistent with whether we have + // hashes or not. + let requires_hash = dist.id.source.requires_hash(); + if let Some(ref sdist) = dist.sdist { + if requires_hash != sdist.hash().is_some() { + return Err(LockErrorKind::Hash { + id: dist.id.clone(), + artifact_type: "source distribution", + expected: requires_hash, + } + .into()); + } + } + for wheel in &dist.wheels { + if requires_hash != wheel.hash.is_some() { + return Err(LockErrorKind::Hash { + id: dist.id.clone(), + artifact_type: "wheel", + expected: requires_hash, + } + .into()); + } + } + } + Ok(Lock { + version, distributions, requires_python, - }; - Self::try_from(wire) + by_id, + }) } /// Returns the [`Distribution`] entries in this lock. @@ -251,7 +422,7 @@ impl Lock { struct LockWire { version: u32, #[serde(rename = "distribution")] - distributions: Vec, + distributions: Vec, #[serde(rename = "requires-python")] requires_python: Option, } @@ -260,7 +431,11 @@ impl From for LockWire { fn from(lock: Lock) -> LockWire { LockWire { version: lock.version, - distributions: lock.distributions, + distributions: lock + .distributions + .into_iter() + .map(DistributionWire::from) + .collect(), requires_python: lock.requires_python, } } @@ -356,198 +531,23 @@ impl Lock { impl TryFrom for Lock { type Error = LockError; - fn try_from(mut wire: LockWire) -> Result { - // Put all dependencies for each distribution in a canonical order and - // check for duplicates. - for dist in &mut wire.distributions { - dist.dependencies.sort(); - for windows in dist.dependencies.windows(2) { - let (dep1, dep2) = (&windows[0], &windows[1]); - if dep1 == dep2 { - return Err(LockErrorKind::DuplicateDependency { - id: dist.id.clone(), - dependency: dep1.clone(), - } - .into()); - } - } - - // Perform the same validation for optional dependencies. - for (extra, dependencies) in &mut dist.optional_dependencies { - dependencies.sort(); - for windows in dependencies.windows(2) { - let (dep1, dep2) = (&windows[0], &windows[1]); - if dep1 == dep2 { - return Err(LockErrorKind::DuplicateOptionalDependency { - id: dist.id.clone(), - extra: extra.clone(), - dependency: dep1.clone(), - } - .into()); - } - } - } - - // Perform the same validation for dev dependencies. - for (group, dependencies) in &mut dist.dev_dependencies { - dependencies.sort(); - for windows in dependencies.windows(2) { - let (dep1, dep2) = (&windows[0], &windows[1]); - if dep1 == dep2 { - return Err(LockErrorKind::DuplicateDevDependency { - id: dist.id.clone(), - group: group.clone(), - dependency: dep1.clone(), - } - .into()); - } - } - } - } - wire.distributions - .sort_by(|dist1, dist2| dist1.id.cmp(&dist2.id)); - - // Check for duplicate distribution IDs and also build up the map for - // distributions keyed by their ID. - let mut by_id = FxHashMap::default(); - for (i, dist) in wire.distributions.iter().enumerate() { - if by_id.insert(dist.id.clone(), i).is_some() { - return Err(LockErrorKind::DuplicateDistribution { - id: dist.id.clone(), - } - .into()); - } - } - - // Build up a map from ID to extras. - let mut extras_by_id = FxHashMap::default(); - for dist in &wire.distributions { - for extra in dist.optional_dependencies.keys() { - extras_by_id - .entry(dist.id.clone()) - .or_insert_with(FxHashSet::default) - .insert(extra.clone()); - } - } - - // Remove any non-existent extras (e.g., extras that were requested but don't exist). - for dist in &mut wire.distributions { - dist.dependencies.retain(|dep| { - dep.extra.as_ref().map_or(true, |extra| { - extras_by_id - .get(&dep.distribution_id) - .is_some_and(|extras| extras.contains(extra)) - }) - }); - - for dependencies in dist.optional_dependencies.values_mut() { - dependencies.retain(|dep| { - dep.extra.as_ref().map_or(true, |extra| { - extras_by_id - .get(&dep.distribution_id) - .is_some_and(|extras| extras.contains(extra)) - }) - }); - } - - for dependencies in dist.dev_dependencies.values_mut() { - dependencies.retain(|dep| { - dep.extra.as_ref().map_or(true, |extra| { - extras_by_id - .get(&dep.distribution_id) - .is_some_and(|extras| extras.contains(extra)) - }) - }); - } - } - - // Check that every dependency has an entry in `by_id`. If any don't, - // it implies we somehow have a dependency with no corresponding locked - // distribution. - for dist in &wire.distributions { - for dep in &dist.dependencies { - if !by_id.contains_key(&dep.distribution_id) { - return Err(LockErrorKind::UnrecognizedDependency { - id: dist.id.clone(), - dependency: dep.clone(), - } - .into()); - } - } - - // Perform the same validation for optional dependencies. - for dependencies in dist.optional_dependencies.values() { - for dep in dependencies { - if !by_id.contains_key(&dep.distribution_id) { - return Err(LockErrorKind::UnrecognizedDependency { - id: dist.id.clone(), - dependency: dep.clone(), - } - .into()); - } - } - } - - // Perform the same validation for dev dependencies. - for dependencies in dist.dev_dependencies.values() { - for dep in dependencies { - if !by_id.contains_key(&dep.distribution_id) { - return Err(LockErrorKind::UnrecognizedDependency { - id: dist.id.clone(), - dependency: dep.clone(), - } - .into()); - } - } - } - - // Also check that our sources are consistent with whether we have - // hashes or not. - let requires_hash = dist.id.source.requires_hash(); - if let Some(ref sdist) = dist.sdist { - if requires_hash != sdist.hash().is_some() { - return Err(LockErrorKind::Hash { - id: dist.id.clone(), - artifact_type: "source distribution", - expected: requires_hash, - } - .into()); - } - } - for wheel in &dist.wheels { - if requires_hash != wheel.hash.is_some() { - return Err(LockErrorKind::Hash { - id: dist.id.clone(), - artifact_type: "wheel", - expected: requires_hash, - } - .into()); - } - } - } - Ok(Lock { - version: wire.version, - distributions: wire.distributions, - requires_python: wire.requires_python, - by_id, - }) + fn try_from(wire: LockWire) -> Result { + let distributions = wire + .distributions + .into_iter() + .map(DistributionWire::unwire) + .collect::, _>>()?; + Lock::new(wire.version, distributions, wire.requires_python) } } -#[derive(Clone, Debug, serde::Deserialize)] -#[serde(rename_all = "kebab-case")] +#[derive(Clone, Debug)] pub struct Distribution { - #[serde(flatten)] pub(crate) id: DistributionId, - #[serde(default)] sdist: Option, - #[serde(default, skip_serializing_if = "Vec::is_empty")] wheels: Vec, - #[serde(default, skip_serializing_if = "Vec::is_empty")] dependencies: Vec, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] optional_dependencies: BTreeMap>, - #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] dev_dependencies: BTreeMap>, } @@ -826,6 +826,71 @@ impl Distribution { } } +#[derive(Clone, Debug, serde::Deserialize)] +#[serde(rename_all = "kebab-case")] +struct DistributionWire { + #[serde(flatten)] + id: DistributionId, + #[serde(default)] + sdist: Option, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + wheels: Vec, + #[serde(default, skip_serializing_if = "Vec::is_empty")] + dependencies: Vec, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + optional_dependencies: BTreeMap>, + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + dev_dependencies: BTreeMap>, +} + +impl DistributionWire { + fn unwire(self) -> Result { + let unwire_deps = |deps: Vec| -> Result, LockError> { + deps.into_iter().map(DependencyWire::unwire).collect() + }; + Ok(Distribution { + id: self.id, + sdist: self.sdist, + wheels: self.wheels, + dependencies: unwire_deps(self.dependencies)?, + optional_dependencies: self + .optional_dependencies + .into_iter() + .map(|(extra, deps)| Ok((extra, unwire_deps(deps)?))) + .collect::>()?, + dev_dependencies: self + .dev_dependencies + .into_iter() + .map(|(group, deps)| Ok((group, unwire_deps(deps)?))) + .collect::>()?, + }) + } +} + +impl From for DistributionWire { + fn from(dist: Distribution) -> DistributionWire { + let wire_deps = |deps: Vec| -> Vec { + deps.into_iter().map(DependencyWire::from).collect() + }; + DistributionWire { + id: dist.id, + sdist: dist.sdist, + wheels: dist.wheels, + dependencies: wire_deps(dist.dependencies), + optional_dependencies: dist + .optional_dependencies + .into_iter() + .map(|(extra, deps)| (extra, wire_deps(deps))) + .collect(), + dev_dependencies: dist + .dev_dependencies + .into_iter() + .map(|(group, deps)| (group, wire_deps(deps))) + .collect(), + } + } +} + /// Inside the lockfile, we match a dependency entry to a distribution entry through a key made up /// of the name, the version and the source url. #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, serde::Deserialize)] @@ -854,6 +919,33 @@ impl std::fmt::Display for DistributionId { } } +#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, serde::Deserialize)] +struct DistributionIdForDependency { + name: PackageName, + version: Version, + source: Source, +} + +impl DistributionIdForDependency { + fn unwire(self) -> Result { + Ok(DistributionId { + name: self.name, + version: self.version, + source: self.source, + }) + } +} + +impl From for DistributionIdForDependency { + fn from(id: DistributionId) -> DistributionIdForDependency { + DistributionIdForDependency { + name: id.name, + version: id.version, + source: id.source, + } + } +} + /// A unique identifier to differentiate between different distributions for the same version of a /// package. /// @@ -1647,13 +1739,10 @@ impl TryFrom for Wheel { } /// A single dependency of a distribution in a lock file. -#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, serde::Deserialize)] +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)] struct Dependency { - #[serde(flatten)] distribution_id: DistributionId, - #[serde(skip_serializing_if = "Option::is_none")] extra: Option, - #[serde(skip_serializing_if = "Option::is_none")] marker: Option, } @@ -1712,6 +1801,37 @@ impl std::fmt::Display for Dependency { } } +/// A single dependency of a distribution in a lock file. +#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, serde::Deserialize)] +struct DependencyWire { + #[serde(flatten)] + distribution_id: DistributionIdForDependency, + #[serde(skip_serializing_if = "Option::is_none")] + extra: Option, + #[serde(skip_serializing_if = "Option::is_none")] + marker: Option, +} + +impl DependencyWire { + fn unwire(self) -> Result { + Ok(Dependency { + distribution_id: self.distribution_id.unwire()?, + extra: self.extra, + marker: self.marker, + }) + } +} + +impl From for DependencyWire { + fn from(dependency: Dependency) -> DependencyWire { + DependencyWire { + distribution_id: DistributionIdForDependency::from(dependency.distribution_id), + extra: dependency.extra, + marker: dependency.marker, + } + } +} + /// A single hash for a distribution artifact in a lock file. /// /// A hash is encoded as a single TOML string in the format