Metadata transformation for the build backend (#7781)

This commit is contained in:
konsti 2024-10-07 10:38:40 +02:00 committed by GitHub
parent 37b73230d3
commit 92538ada7c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 1630 additions and 72 deletions

View file

@ -0,0 +1,166 @@
mod metadata;
mod pep639_glob;
use crate::metadata::{PyProjectToml, ValidationError};
use crate::pep639_glob::Pep639GlobError;
use async_zip::base::write::ZipFileWriter;
use async_zip::error::ZipError;
use async_zip::{Compression, ZipEntryBuilder, ZipString};
use glob::{GlobError, PatternError};
use std::io;
use std::path::{Path, PathBuf};
use thiserror::Error;
use uv_distribution_filename::WheelFilename;
#[derive(Debug, Error)]
pub enum Error {
#[error(transparent)]
Io(#[from] io::Error),
#[error("Invalid pyproject.toml")]
Toml(#[from] toml::de::Error),
#[error("Invalid pyproject.toml")]
Validation(#[from] ValidationError),
#[error("Invalid `project.license-files` glob expression: `{0}`")]
Pep639Glob(String, #[source] Pep639GlobError),
#[error("The `project.license-files` entry is not a valid glob pattern: `{0}`")]
Pattern(String, #[source] PatternError),
/// [`GlobError`] is a wrapped io error.
#[error(transparent)]
Glob(#[from] GlobError),
#[error("Failed to write wheel zip archive")]
Zip(#[from] ZipError),
}
/// Allow dispatching between writing to a directory, writing to zip and writing to a `.tar.gz`.
trait AsyncDirectoryWrite: Sized {
async fn write_bytes(
&mut self,
directory: &Path,
filename: &str,
bytes: &[u8],
) -> Result<(), Error>;
#[allow(clippy::unused_async)] // https://github.com/rust-lang/rust-clippy/issues/11660
async fn close(self) -> Result<(), Error> {
Ok(())
}
}
/// Zip archive (wheel) writer.
struct AsyncZipWriter(ZipFileWriter<tokio_util::compat::Compat<fs_err::tokio::File>>);
impl AsyncDirectoryWrite for AsyncZipWriter {
async fn write_bytes(
&mut self,
directory: &Path,
filename: &str,
bytes: &[u8],
) -> Result<(), Error> {
self.0
.write_entry_whole(
ZipEntryBuilder::new(
ZipString::from(format!("{}/{}", directory.display(), filename)),
// TODO(konsti): Editables use stored.
Compression::Deflate,
)
// https://github.com/Majored/rs-async-zip/issues/150
.unix_permissions(0o644),
bytes,
)
.await?;
Ok(())
}
async fn close(self) -> Result<(), Error> {
self.0.close().await?;
Ok(())
}
}
struct AsyncFsWriter {
root: PathBuf,
}
/// File system writer.
impl AsyncDirectoryWrite for AsyncFsWriter {
async fn write_bytes(
&mut self,
directory: &Path,
filename: &str,
bytes: &[u8],
) -> Result<(), Error> {
fs_err::tokio::create_dir_all(self.root.join(directory)).await?;
fs_err::tokio::write(self.root.join(directory).join(filename), bytes).await?;
Ok(())
}
}
/// Build a wheel from the source tree and place it in the output directory.
pub async fn build(source_tree: &Path, wheel_dir: &Path) -> Result<WheelFilename, Error> {
let contents = fs_err::tokio::read_to_string(source_tree.join("pyproject.toml")).await?;
let pyproject_toml = PyProjectToml::parse(&contents)?;
pyproject_toml.check_build_system();
let filename = WheelFilename {
name: pyproject_toml.name().clone(),
version: pyproject_toml.version().clone(),
build_tag: None,
python_tag: vec!["py3".to_string()],
abi_tag: vec!["none".to_string()],
platform_tag: vec!["any".to_string()],
};
// TODO(konsti): async-zip doesn't like a buffered writer
let wheel_file = fs_err::tokio::File::create(wheel_dir.join(filename.to_string())).await?;
let mut wheel_writer = AsyncZipWriter(ZipFileWriter::with_tokio(wheel_file));
write_dist_info(&mut wheel_writer, &pyproject_toml, source_tree).await?;
wheel_writer.close().await?;
Ok(filename)
}
/// Write the dist-info directory to the output directory without building the wheel.
pub async fn metadata(source_tree: &Path, metadata_directory: &Path) -> Result<String, Error> {
let contents = fs_err::tokio::read_to_string(source_tree.join("pyproject.toml")).await?;
let pyproject_toml = PyProjectToml::parse(&contents)?;
pyproject_toml.check_build_system();
let mut wheel_writer = AsyncFsWriter {
root: metadata_directory.to_path_buf(),
};
write_dist_info(&mut wheel_writer, &pyproject_toml, source_tree).await?;
wheel_writer.close().await?;
Ok(format!(
"{}-{}.dist-info",
pyproject_toml.name().as_dist_info_name(),
pyproject_toml.version()
))
}
/// Add `METADATA` and `entry_points.txt` to the dist-info directory.
async fn write_dist_info(
writer: &mut impl AsyncDirectoryWrite,
pyproject_toml: &PyProjectToml,
root: &Path,
) -> Result<(), Error> {
let dist_info_dir = PathBuf::from(format!(
"{}-{}.dist-info",
pyproject_toml.name().as_dist_info_name(),
pyproject_toml.version()
));
let metadata = pyproject_toml
.to_metadata(root)
.await?
.core_metadata_format();
writer
.write_bytes(&dist_info_dir, "METADATA", metadata.as_bytes())
.await?;
let entrypoint = pyproject_toml.to_entry_points()?;
writer
.write_bytes(&dist_info_dir, "entry_points.txt", entrypoint.as_bytes())
.await?;
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,136 @@
//! Implementation of PEP 639 cross-language restricted globs.
use glob::{Pattern, PatternError};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Pep639GlobError {
#[error(transparent)]
PatternError(#[from] PatternError),
#[error("The parent directory operator (`..`) at position {pos} is not allowed in license file globs")]
ParentDirectory { pos: usize },
#[error("Glob contains invalid character at position {pos}: `{invalid}`")]
InvalidCharacter { pos: usize, invalid: char },
#[error("Glob contains invalid character in range at position {pos}: `{invalid}`")]
InvalidCharacterRange { pos: usize, invalid: char },
}
/// Parse a PEP 639 `license-files` glob.
///
/// The syntax is more restricted than regular globbing in Python or Rust for platform independent
/// results. Since [`glob::Pattern`] is a superset over this format, we can use it after validating
/// that no unsupported features are in the string.
///
/// From [PEP 639](https://peps.python.org/pep-0639/#add-license-files-key):
///
/// > Its value is an array of strings which MUST contain valid glob patterns,
/// > as specified below:
/// >
/// > - Alphanumeric characters, underscores (`_`), hyphens (`-`) and dots (`.`)
/// > MUST be matched verbatim.
/// >
/// > - Special glob characters: `*`, `?`, `**` and character ranges: `[]`
/// > containing only the verbatim matched characters MUST be supported.
/// > Within `[...]`, the hyphen indicates a range (e.g. `a-z`).
/// > Hyphens at the start or end are matched literally.
/// >
/// > - Path delimiters MUST be the forward slash character (`/`).
/// > Patterns are relative to the directory containing `pyproject.toml`,
/// > therefore the leading slash character MUST NOT be used.
/// >
/// > - Parent directory indicators (`..`) MUST NOT be used.
/// >
/// > Any characters or character sequences not covered by this specification are
/// > invalid. Projects MUST NOT use such values.
/// > Tools consuming this field MAY reject invalid values with an error.
pub(crate) fn parse_pep639_glob(glob: &str) -> Result<Pattern, Pep639GlobError> {
let mut chars = glob.chars().enumerate().peekable();
// A `..` is on a parent directory indicator at the start of the string or after a directory
// separator.
let mut start_or_slash = true;
while let Some((pos, c)) = chars.next() {
if c.is_alphanumeric() || matches!(c, '_' | '-' | '*' | '?') {
start_or_slash = false;
} else if c == '.' {
if start_or_slash && matches!(chars.peek(), Some((_, '.'))) {
return Err(Pep639GlobError::ParentDirectory { pos });
}
start_or_slash = false;
} else if c == '/' {
start_or_slash = true;
} else if c == '[' {
for (pos, c) in chars.by_ref() {
// TODO: https://discuss.python.org/t/pep-639-round-3-improving-license-clarity-with-better-package-metadata/53020/98
if c.is_alphanumeric() || matches!(c, '_' | '-' | '.') {
// Allowed.
} else if c == ']' {
break;
} else {
return Err(Pep639GlobError::InvalidCharacterRange { pos, invalid: c });
}
}
start_or_slash = false;
} else {
return Err(Pep639GlobError::InvalidCharacter { pos, invalid: c });
}
}
Ok(Pattern::new(glob)?)
}
#[cfg(test)]
mod tests {
use super::*;
use insta::assert_snapshot;
#[test]
fn test_error() {
let parse_err = |glob| parse_pep639_glob(glob).unwrap_err().to_string();
assert_snapshot!(
parse_err(".."),
@"The parent directory operator (`..`) at position 0 is not allowed in license file globs"
);
assert_snapshot!(
parse_err("licenses/.."),
@"The parent directory operator (`..`) at position 9 is not allowed in license file globs"
);
assert_snapshot!(
parse_err("licenses/LICEN!E.txt"),
@"Glob contains invalid character at position 14: `!`"
);
assert_snapshot!(
parse_err("licenses/LICEN[!C]E.txt"),
@"Glob contains invalid character in range at position 15: `!`"
);
assert_snapshot!(
parse_err("licenses/LICEN[C?]E.txt"),
@"Glob contains invalid character in range at position 16: `?`"
);
assert_snapshot!(parse_err("******"), @"Pattern syntax error near position 2: wildcards are either regular `*` or recursive `**`");
assert_snapshot!(
parse_err(r"licenses\eula.txt"),
@r"Glob contains invalid character at position 8: `\`"
);
}
#[test]
fn test_valid() {
let cases = [
"licenses/*.txt",
"licenses/**/*.txt",
"LICEN[CS]E.txt",
"LICEN?E.txt",
"[a-z].txt",
"[a-z._-].txt",
"*/**",
"LICENSE..txt",
"LICENSE_file-1.txt",
// (google translate)
"licenses/라이센스*.txt",
"licenses/ライセンス*.txt",
"licenses/执照*.txt",
];
for case in cases {
parse_pep639_glob(case).unwrap();
}
}
}