Use separate path types for directories and files (#4285)

## Summary

This is what I consider to be the "real" fix for #8072. We now treat
directory and path URLs as separate `ParsedUrl` types and
`RequirementSource` types. This removes a lot of `.is_dir()` forking
within the `ParsedUrl::Path` arms and makes some states impossible
(e.g., you can't have a `.whl` path that is editable). It _also_ fixes
the `direct_url.json` for direct URLs that refer to files. Previously,
we wrote out to these as if they were installed as directories, which is
just wrong.
This commit is contained in:
Charlie Marsh 2024-06-12 12:59:21 -07:00 committed by GitHub
parent c4483017ac
commit d8f1de6134
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 524 additions and 167 deletions

View file

@ -44,10 +44,10 @@ impl Pep508Url for VerbatimParsedUrl {
type Err = ParsedUrlError;
fn parse_url(url: &str, working_dir: Option<&Path>) -> Result<Self, Self::Err> {
let verbatim_url = <VerbatimUrl as Pep508Url>::parse_url(url, working_dir)?;
let verbatim = <VerbatimUrl as Pep508Url>::parse_url(url, working_dir)?;
Ok(Self {
parsed_url: ParsedUrl::try_from(verbatim_url.to_url())?,
verbatim: verbatim_url,
parsed_url: ParsedUrl::try_from(verbatim.to_url())?,
verbatim,
})
}
}
@ -58,28 +58,56 @@ impl UnnamedRequirementUrl for VerbatimParsedUrl {
working_dir: impl AsRef<Path>,
) -> Result<Self, Self::Err> {
let verbatim = VerbatimUrl::parse_path(&path, &working_dir)?;
let parsed_path_url = ParsedPathUrl {
url: verbatim.to_url(),
install_path: verbatim.as_path()?,
lock_path: path.as_ref().to_path_buf(),
editable: false,
let verbatim_path = verbatim.as_path()?;
let is_dir = if let Ok(metadata) = verbatim_path.metadata() {
metadata.is_dir()
} else {
verbatim_path.extension().is_none()
};
let parsed_url = if is_dir {
ParsedUrl::Directory(ParsedDirectoryUrl {
url: verbatim.to_url(),
install_path: verbatim.as_path()?,
lock_path: path.as_ref().to_path_buf(),
editable: false,
})
} else {
ParsedUrl::Path(ParsedPathUrl {
url: verbatim.to_url(),
install_path: verbatim.as_path()?,
lock_path: path.as_ref().to_path_buf(),
})
};
Ok(Self {
parsed_url: ParsedUrl::Path(parsed_path_url),
parsed_url,
verbatim,
})
}
fn parse_absolute_path(path: impl AsRef<Path>) -> Result<Self, Self::Err> {
let verbatim = VerbatimUrl::parse_absolute_path(&path)?;
let parsed_path_url = ParsedPathUrl {
url: verbatim.to_url(),
install_path: verbatim.as_path()?,
lock_path: path.as_ref().to_path_buf(),
editable: false,
let verbatim_path = verbatim.as_path()?;
let is_dir = if let Ok(metadata) = verbatim_path.metadata() {
metadata.is_dir()
} else {
verbatim_path.extension().is_none()
};
let parsed_url = if is_dir {
ParsedUrl::Directory(ParsedDirectoryUrl {
url: verbatim.to_url(),
install_path: verbatim.as_path()?,
lock_path: path.as_ref().to_path_buf(),
editable: false,
})
} else {
ParsedUrl::Path(ParsedPathUrl {
url: verbatim.to_url(),
install_path: verbatim.as_path()?,
lock_path: path.as_ref().to_path_buf(),
})
};
Ok(Self {
parsed_url: ParsedUrl::Path(parsed_path_url),
parsed_url,
verbatim,
})
}
@ -150,8 +178,10 @@ impl<'de> serde::de::Deserialize<'de> for VerbatimParsedUrl {
/// A URL in a requirement `foo @ <url>` must be one of the above.
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
pub enum ParsedUrl {
/// The direct URL is a path to a local directory or file.
/// The direct URL is a path to a local file.
Path(ParsedPathUrl),
/// The direct URL is a path to a local directory.
Directory(ParsedDirectoryUrl),
/// The direct URL is path to a Git repository.
Git(ParsedGitUrl),
/// The direct URL is a URL to a source archive (e.g., a `.tar.gz` file) or built archive
@ -162,16 +192,46 @@ pub enum ParsedUrl {
impl ParsedUrl {
/// Returns `true` if the URL is editable.
pub fn is_editable(&self) -> bool {
matches!(self, Self::Path(ParsedPathUrl { editable: true, .. }))
matches!(
self,
Self::Directory(ParsedDirectoryUrl { editable: true, .. })
)
}
}
/// A local path url
/// A local path URL for a file (i.e., a built or source distribution).
///
/// Examples:
/// * `file:///home/ferris/my_project/my_project-0.1.0.tar.gz`
/// * `file:///home/ferris/my_project/my_project-0.1.0-py3-none-any.whl`
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
pub struct ParsedPathUrl {
pub url: Url,
/// The resolved, absolute path to the distribution which we use for installing.
pub install_path: PathBuf,
/// The absolute path or path relative to the workspace root pointing to the distribution
/// which we use for locking. Unlike `given` on the verbatim URL all environment variables
/// are resolved, and unlike the install path, we did not yet join it on the base directory.
pub lock_path: PathBuf,
}
impl ParsedPathUrl {
/// Construct a [`ParsedPathUrl`] from a path requirement source.
pub fn from_source(install_path: PathBuf, lock_path: PathBuf, url: Url) -> Self {
Self {
url,
install_path,
lock_path,
}
}
}
/// A local path URL for a source directory.
///
/// Examples:
/// * `file:///home/ferris/my_project`
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Hash, Ord)]
pub struct ParsedPathUrl {
pub struct ParsedDirectoryUrl {
pub url: Url,
/// The resolved, absolute path to the distribution which we use for installing.
pub install_path: PathBuf,
@ -182,8 +242,8 @@ pub struct ParsedPathUrl {
pub editable: bool,
}
impl ParsedPathUrl {
/// Construct a [`ParsedPathUrl`] from a path requirement source.
impl ParsedDirectoryUrl {
/// Construct a [`ParsedDirectoryUrl`] from a path requirement source.
pub fn from_source(
install_path: PathBuf,
lock_path: PathBuf,
@ -322,12 +382,25 @@ impl TryFrom<Url> for ParsedUrl {
let path = url
.to_file_path()
.map_err(|()| ParsedUrlError::InvalidFileUrl(url.clone()))?;
Ok(Self::Path(ParsedPathUrl {
url,
install_path: path.clone(),
lock_path: path,
editable: false,
}))
let is_dir = if let Ok(metadata) = path.metadata() {
metadata.is_dir()
} else {
path.extension().is_none()
};
if is_dir {
Ok(Self::Directory(ParsedDirectoryUrl {
url,
install_path: path.clone(),
lock_path: path,
editable: false,
}))
} else {
Ok(Self::Path(ParsedPathUrl {
url,
install_path: path.clone(),
lock_path: path,
}))
}
} else {
Ok(Self::Archive(ParsedArchiveUrl::from(url)))
}
@ -340,6 +413,7 @@ impl TryFrom<&ParsedUrl> for DirectUrl {
fn try_from(value: &ParsedUrl) -> Result<Self, Self::Error> {
match value {
ParsedUrl::Path(value) => Self::try_from(value),
ParsedUrl::Directory(value) => Self::try_from(value),
ParsedUrl::Git(value) => Self::try_from(value),
ParsedUrl::Archive(value) => Self::try_from(value),
}
@ -350,6 +424,21 @@ impl TryFrom<&ParsedPathUrl> for DirectUrl {
type Error = ParsedUrlError;
fn try_from(value: &ParsedPathUrl) -> Result<Self, Self::Error> {
Ok(Self::ArchiveUrl {
url: value.url.to_string(),
archive_info: ArchiveInfo {
hash: None,
hashes: None,
},
subdirectory: None,
})
}
}
impl TryFrom<&ParsedDirectoryUrl> for DirectUrl {
type Error = ParsedUrlError;
fn try_from(value: &ParsedDirectoryUrl) -> Result<Self, Self::Error> {
Ok(Self::LocalDirectory {
url: value.url.to_string(),
dir_info: DirInfo {
@ -394,6 +483,7 @@ impl From<ParsedUrl> for Url {
fn from(value: ParsedUrl) -> Self {
match value {
ParsedUrl::Path(value) => value.into(),
ParsedUrl::Directory(value) => value.into(),
ParsedUrl::Git(value) => value.into(),
ParsedUrl::Archive(value) => value.into(),
}
@ -406,6 +496,12 @@ impl From<ParsedPathUrl> for Url {
}
}
impl From<ParsedDirectoryUrl> for Url {
fn from(value: ParsedDirectoryUrl) -> Self {
value.url
}
}
impl From<ParsedArchiveUrl> for Url {
fn from(value: ParsedArchiveUrl) -> Self {
let mut url = value.url;

View file

@ -114,6 +114,9 @@ impl Display for Requirement {
RequirementSource::Path { url, .. } => {
write!(f, " @ {url}")?;
}
RequirementSource::Directory { url, .. } => {
write!(f, " @ {url}")?;
}
}
if let Some(marker) = &self.marker {
write!(f, " ; {marker}")?;
@ -166,10 +169,22 @@ pub enum RequirementSource {
url: VerbatimUrl,
},
/// A local built or source distribution, either from a path or a `file://` URL. It can either
/// be a binary distribution (a `.whl` file), a source distribution archive (a `.zip` or
/// `.tag.gz` file) or a source tree (a directory with a pyproject.toml in, or a legacy
/// source distribution with only a setup.py but non pyproject.toml in it).
/// be a binary distribution (a `.whl` file) or a source distribution archive (a `.zip` or
/// `.tar.gz` file).
Path {
/// The resolved, absolute path to the distribution which we use for installing.
install_path: PathBuf,
/// The absolute path or path relative to the workspace root pointing to the distribution
/// which we use for locking. Unlike `given` on the verbatim URL all environment variables
/// are resolved, and unlike the install path, we did not yet join it on the base directory.
lock_path: PathBuf,
/// The PEP 508 style URL in the format
/// `file:///<path>#subdirectory=<subdirectory>`.
url: VerbatimUrl,
},
/// A local source tree (a directory with a pyproject.toml in, or a legacy
/// source distribution with only a setup.py but non pyproject.toml in it).
Directory {
/// The resolved, absolute path to the distribution which we use for installing.
install_path: PathBuf,
/// The absolute path or path relative to the workspace root pointing to the distribution
@ -193,7 +208,12 @@ impl RequirementSource {
install_path: local_file.install_path.clone(),
lock_path: local_file.lock_path.clone(),
url,
editable: local_file.editable,
},
ParsedUrl::Directory(directory) => RequirementSource::Directory {
install_path: directory.install_path.clone(),
lock_path: directory.lock_path.clone(),
editable: directory.editable,
url,
},
ParsedUrl::Git(git) => RequirementSource::Git {
url,
@ -212,6 +232,6 @@ impl RequirementSource {
/// Returns `true` if the source is editable.
pub fn is_editable(&self) -> bool {
matches!(self, Self::Path { editable: true, .. })
matches!(self, Self::Directory { editable: true, .. })
}
}