Support lossless serialization for Git dependencies in lockfile (#3630)

## Summary

This PR adds lossless deserialization for `GitSourceDist` distributions
in the lockfile. Specifically, we now properly preserve the requested
revision, the subdirectory, and the precise Git commit SHA.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2024-05-17 17:23:40 -04:00 committed by GitHub
parent 0f67a6ceea
commit 0d512be46c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 182 additions and 30 deletions

View file

@ -1,7 +1,7 @@
use std::str::FromStr;
/// A complete Git SHA, i.e., a 40-character hexadecimal representation of a Git commit.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct GitSha(git2::Oid);
impl GitSha {

View file

@ -3,6 +3,8 @@
#![allow(dead_code, unreachable_code, unused_variables)]
use std::collections::VecDeque;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use rustc_hash::FxHashMap;
use url::Url;
@ -17,6 +19,7 @@ use pep440_rs::Version;
use pep508_rs::{MarkerEnvironment, VerbatimUrl};
use platform_tags::{TagCompatibility, TagPriority, Tags};
use pypi_types::HashDigest;
use uv_git::{GitReference, GitSha};
use uv_normalize::PackageName;
use crate::resolution::AnnotatedDist;
@ -229,7 +232,7 @@ impl Distribution {
/// Convert the [`Distribution`] to a [`Dist`] that can be used in installation.
fn to_dist(&self, _marker_env: &MarkerEnvironment, tags: &Tags) -> Dist {
if let Some(best_wheel_index) = self.find_best_wheel(tags) {
return match self.id.source.kind {
return match &self.id.source.kind {
SourceKind::Registry => {
let wheels = self
.wheels
@ -259,7 +262,7 @@ impl Distribution {
}
if let Some(sdist) = &self.sdist {
return match self.id.source.kind {
return match &self.id.source.kind {
SourceKind::Path => {
let path_dist = PathSourceDist {
name: self.id.name.clone(),
@ -279,6 +282,29 @@ impl Distribution {
let source_dist = distribution_types::SourceDist::Directory(dir_dist);
Dist::Source(source_dist)
}
SourceKind::Git(git) => {
// Reconstruct the `GitUrl` from the `GitSource`.
let git_url = uv_git::GitUrl::new(
self.id.source.url.clone(),
GitReference::from(git.kind.clone()),
)
.with_precise(git.precise);
// Reconstruct the PEP 508-compatible URL from the `GitSource`.
// TODO(charlie): This shouldn't be necessary; it's only necessary because we're
// still losing the `GitUrl` somewhere along the path to installation.
let url = Url::from(git_url.clone());
let git_dist = GitSourceDist {
name: self.id.name.clone(),
url: VerbatimUrl::from_url(url),
git: Box::new(git_url),
subdirectory: git.subdirectory.as_ref().map(PathBuf::from),
};
let source_dist = distribution_types::SourceDist::Git(git_dist);
Dist::Source(source_dist)
}
// TODO: Handle other kinds of sources.
_ => todo!(),
};
@ -451,14 +477,17 @@ impl Source {
}
fn from_git_dist(git_dist: &GitSourceDist) -> Source {
// TODO(konsti): Fill in the Git revision details. GitSource and GitSourceDist are
// slightly mismatched.
Source {
kind: SourceKind::Git(GitSource {
precise: git_dist.git.precise().map(|git_sha| git_sha.to_string()),
kind: GitSourceKind::DefaultBranch,
kind: GitSourceKind::from(git_dist.git.reference().clone()),
precise: git_dist.git.precise().expect("precise commit"),
subdirectory: git_dist
.subdirectory
.as_deref()
.and_then(Path::to_str)
.map(ToString::to_string),
}),
url: git_dist.git.repository().clone(),
url: locked_git_url(git_dist),
}
}
}
@ -477,7 +506,10 @@ impl std::str::FromStr for Source {
url,
}),
"git" => Ok(Source {
kind: SourceKind::Git(GitSource::from_url(&mut url)),
kind: SourceKind::Git(GitSource::from_url(&mut url).map_err(|err| match err {
GitSourceError::InvalidSha => SourceParseError::invalid_sha(s),
GitSourceError::MissingSha => SourceParseError::missing_sha(s),
})?),
url,
}),
"direct" => Ok(Source {
@ -526,10 +558,7 @@ impl<'de> serde::Deserialize<'de> for Source {
/// variants should be added without changing the relative ordering of other
/// variants. Otherwise, this could cause the lock file to have a different
/// canonical ordering of distributions.
#[derive(
Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, serde::Deserialize, serde::Serialize,
)]
#[serde(rename_all = "kebab-case")]
#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
pub(crate) enum SourceKind {
Registry,
Git(GitSource),
@ -565,34 +594,48 @@ impl SourceKind {
/// variants should be added without changing the relative ordering of other
/// variants. Otherwise, this could cause the lock file to have a different
/// canonical ordering of distributions.
#[derive(
Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, serde::Deserialize, serde::Serialize,
)]
#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
pub(crate) struct GitSource {
precise: Option<String>,
precise: GitSha,
subdirectory: Option<String>,
kind: GitSourceKind,
}
/// An error that occurs when a source string could not be parsed.
#[derive(Clone, Debug, Eq, PartialEq)]
enum GitSourceError {
InvalidSha,
MissingSha,
}
impl GitSource {
/// Extracts a git source reference from the query pairs and the hash
/// fragment in the given URL.
///
/// This also removes the query pairs and hash fragment from the given
/// URL in place.
fn from_url(url: &mut Url) -> GitSource {
fn from_url(url: &mut Url) -> Result<GitSource, GitSourceError> {
let mut kind = GitSourceKind::DefaultBranch;
let mut subdirectory = None;
for (key, val) in url.query_pairs() {
kind = match &*key {
"tag" => GitSourceKind::Tag(val.into_owned()),
"branch" => GitSourceKind::Branch(val.into_owned()),
"rev" => GitSourceKind::Rev(val.into_owned()),
match &*key {
"tag" => kind = GitSourceKind::Tag(val.into_owned()),
"branch" => kind = GitSourceKind::Branch(val.into_owned()),
"rev" => kind = GitSourceKind::Rev(val.into_owned()),
"subdirectory" => subdirectory = Some(val.into_owned()),
_ => continue,
};
}
let precise = url.fragment().map(ToString::to_string);
let precise = GitSha::from_str(url.fragment().ok_or(GitSourceError::MissingSha)?)
.map_err(|_| GitSourceError::InvalidSha)?;
url.set_query(None);
url.set_fragment(None);
GitSource { precise, kind }
Ok(GitSource {
precise,
subdirectory,
kind,
})
}
}
@ -693,7 +736,7 @@ impl SourceDist {
fn from_git_dist(git_dist: &GitSourceDist, hashes: &[HashDigest]) -> SourceDist {
SourceDist {
url: git_dist.url.to_url(),
url: locked_git_url(git_dist),
hash: hashes.first().cloned().map(Hash::from),
}
}
@ -716,6 +759,80 @@ impl SourceDist {
}
}
impl From<GitReference> for GitSourceKind {
fn from(value: GitReference) -> Self {
match value {
GitReference::Branch(branch) => GitSourceKind::Branch(branch.to_string()),
GitReference::Tag(tag) => GitSourceKind::Tag(tag.to_string()),
GitReference::ShortCommit(rev) => GitSourceKind::Rev(rev.to_string()),
GitReference::BranchOrTag(rev) => GitSourceKind::Rev(rev.to_string()),
GitReference::BranchOrTagOrCommit(rev) => GitSourceKind::Rev(rev.to_string()),
GitReference::NamedRef(rev) => GitSourceKind::Rev(rev.to_string()),
GitReference::FullCommit(rev) => GitSourceKind::Rev(rev.to_string()),
GitReference::DefaultBranch => GitSourceKind::DefaultBranch,
}
}
}
impl From<GitSourceKind> for GitReference {
fn from(value: GitSourceKind) -> Self {
match value {
GitSourceKind::Branch(branch) => GitReference::Branch(branch),
GitSourceKind::Tag(tag) => GitReference::Tag(tag),
GitSourceKind::Rev(rev) => GitReference::from_rev(&rev),
GitSourceKind::DefaultBranch => GitReference::DefaultBranch,
}
}
}
/// Construct the lockfile-compatible [`URL`] for a [`GitSourceDist`].
fn locked_git_url(git_dist: &GitSourceDist) -> Url {
let mut url = git_dist.git.repository().clone();
// Clear out any existing state.
url.set_fragment(None);
url.set_query(None);
// Put the subdirectory in the query.
if let Some(subdirectory) = git_dist.subdirectory.as_deref().and_then(Path::to_str) {
url.query_pairs_mut()
.append_pair("subdirectory", subdirectory);
}
// Put the requested reference in the query.
match git_dist.git.reference() {
GitReference::Branch(branch) => {
url.query_pairs_mut()
.append_pair("branch", branch.to_string().as_str());
}
GitReference::Tag(tag) => {
url.query_pairs_mut()
.append_pair("tag", tag.to_string().as_str());
}
GitReference::ShortCommit(rev)
| GitReference::BranchOrTag(rev)
| GitReference::BranchOrTagOrCommit(rev)
| GitReference::NamedRef(rev)
| GitReference::FullCommit(rev) => {
url.query_pairs_mut()
.append_pair("rev", rev.to_string().as_str());
}
GitReference::DefaultBranch => {}
}
// Put the precise commit in the fragment.
url.set_fragment(
git_dist
.git
.precise()
.as_ref()
.map(GitSha::to_string)
.as_deref(),
);
url
}
/// Inspired by: <https://discuss.python.org/t/lock-files-again-but-this-time-w-sdists/46593>
#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
#[serde(into = "WheelWire", try_from = "WheelWire")]
@ -1163,14 +1280,27 @@ impl SourceParseError {
let kind = SourceParseErrorKind::InvalidUrl { err };
SourceParseError { given, kind }
}
fn missing_sha(given: &str) -> SourceParseError {
let given = given.to_string();
let kind = SourceParseErrorKind::MissingSha;
SourceParseError { given, kind }
}
fn invalid_sha(given: &str) -> SourceParseError {
let given = given.to_string();
let kind = SourceParseErrorKind::InvalidSha;
SourceParseError { given, kind }
}
}
impl std::error::Error for SourceParseError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self.kind {
SourceParseErrorKind::NoPlus | SourceParseErrorKind::UnrecognizedSourceName { .. } => {
None
}
SourceParseErrorKind::NoPlus
| SourceParseErrorKind::UnrecognizedSourceName { .. }
| SourceParseErrorKind::MissingSha
| SourceParseErrorKind::InvalidSha => None,
SourceParseErrorKind::InvalidUrl { ref err } => Some(err),
}
}
@ -1185,6 +1315,8 @@ impl std::fmt::Display for SourceParseError {
write!(f, "unrecognized name `{name}` in source `{given}`")
}
SourceParseErrorKind::InvalidUrl { .. } => write!(f, "invalid URL in source `{given}`"),
SourceParseErrorKind::MissingSha => write!(f, "missing SHA in source `{given}`"),
SourceParseErrorKind::InvalidSha => write!(f, "invalid SHA in source `{given}`"),
}
}
}
@ -1204,6 +1336,10 @@ enum SourceParseErrorKind {
/// The URL parse error.
err: url::ParseError,
},
/// An error that occurs when a Git URL is missing a precise commit SHA.
MissingSha,
/// An error that occurs when a Git URL has an invalid SHA.
InvalidSha,
}
/// An error that occurs when a hash digest could not be parsed.

View file

@ -212,10 +212,10 @@ fn lock_git() -> Result<()> {
[[distribution]]
name = "anyio"
version = "3.7.0"
source = "git+https://github.com/agronholm/anyio"
source = "git+https://github.com/agronholm/anyio?rev=3.7.0#f7a880ffac4766efb39e6fb60fc28d944f5d2f65"
[distribution.sdist]
url = "git+https://github.com/agronholm/anyio@f7a880ffac4766efb39e6fb60fc28d944f5d2f65"
url = "https://github.com/agronholm/anyio?rev=3.7.0#f7a880ffac4766efb39e6fb60fc28d944f5d2f65"
[[distribution.dependencies]]
name = "exceptiongroup"
@ -291,6 +291,22 @@ fn lock_git() -> Result<()> {
"###
);
// Install from the lockfile.
uv_snapshot!(context.install().arg("--unstable-uv-lock-file").arg("anyio"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Downloaded 4 packages in [TIME]
Installed 5 packages in [TIME]
+ anyio==3.7.0 (from https://github.com/agronholm/anyio@f7a880ffac4766efb39e6fb60fc28d944f5d2f65)
+ exceptiongroup==1.2.0
+ idna==3.6
+ sniffio==1.3.1
+ typing-extensions==4.10.0
"###);
Ok(())
}