Build backend: Refactoring before list (#9558)
Some checks are pending
CI / check system | python on macos aarch64 (push) Blocked by required conditions
CI / cargo dev generate-all (push) Blocked by required conditions
CI / cargo shear (push) Waiting to run
CI / cargo test | ubuntu (push) Blocked by required conditions
CI / Determine changes (push) Waiting to run
CI / lint (push) Waiting to run
CI / cargo clippy | ubuntu (push) Blocked by required conditions
CI / cargo clippy | windows (push) Blocked by required conditions
CI / cargo test | macos (push) Blocked by required conditions
CI / cargo test | windows (push) Blocked by required conditions
CI / check windows trampoline | aarch64 (push) Blocked by required conditions
CI / check windows trampoline | i686 (push) Blocked by required conditions
CI / check windows trampoline | x86_64 (push) Blocked by required conditions
CI / test windows trampoline | i686 (push) Blocked by required conditions
CI / test windows trampoline | x86_64 (push) Blocked by required conditions
CI / typos (push) Waiting to run
CI / mkdocs (push) Waiting to run
CI / build binary | linux (push) Blocked by required conditions
CI / build binary | macos aarch64 (push) Blocked by required conditions
CI / build binary | macos x86_64 (push) Blocked by required conditions
CI / build binary | windows (push) Blocked by required conditions
CI / cargo build (msrv) (push) Blocked by required conditions
CI / build binary | freebsd (push) Blocked by required conditions
CI / ecosystem test | prefecthq/prefect (push) Blocked by required conditions
CI / ecosystem test | pallets/flask (push) Blocked by required conditions
CI / integration test | conda on ubuntu (push) Blocked by required conditions
CI / integration test | free-threaded on linux (push) Blocked by required conditions
CI / integration test | free-threaded on windows (push) Blocked by required conditions
CI / integration test | pypy on ubuntu (push) Blocked by required conditions
CI / integration test | pypy on windows (push) Blocked by required conditions
CI / integration test | graalpy on ubuntu (push) Blocked by required conditions
CI / integration test | graalpy on windows (push) Blocked by required conditions
CI / integration test | github actions (push) Blocked by required conditions
CI / integration test | determine publish changes (push) Blocked by required conditions
CI / check system | alpine (push) Blocked by required conditions
CI / integration test | uv publish (push) Blocked by required conditions
CI / check cache | ubuntu (push) Blocked by required conditions
CI / check cache | macos aarch64 (push) Blocked by required conditions
CI / check system | python on debian (push) Blocked by required conditions
CI / check system | python on fedora (push) Blocked by required conditions
CI / check system | conda3.11 on macos (push) Blocked by required conditions
CI / check system | conda3.8 on macos (push) Blocked by required conditions
CI / check system | python on ubuntu (push) Blocked by required conditions
CI / check system | python on opensuse (push) Blocked by required conditions
CI / check system | python on rocky linux 8 (push) Blocked by required conditions
CI / check system | python on rocky linux 9 (push) Blocked by required conditions
CI / check system | pypy on ubuntu (push) Blocked by required conditions
CI / check system | pyston (push) Blocked by required conditions
CI / check system | homebrew python on macos aarch64 (push) Blocked by required conditions
CI / check system | python on macos x86_64 (push) Blocked by required conditions
CI / check system | python3.10 on windows (push) Blocked by required conditions
CI / check system | python3.10 on windows x86 (push) Blocked by required conditions
CI / check system | python3.13 on windows (push) Blocked by required conditions
CI / check system | python3.12 via chocolatey (push) Blocked by required conditions
CI / check system | python3.9 via pyenv (push) Blocked by required conditions
CI / check system | python3.13 (push) Blocked by required conditions
CI / check system | conda3.11 on linux (push) Blocked by required conditions
CI / check system | conda3.8 on linux (push) Blocked by required conditions
CI / check system | conda3.11 on windows (push) Blocked by required conditions
CI / check system | conda3.8 on windows (push) Blocked by required conditions
CI / check system | amazonlinux (push) Blocked by required conditions
CI / check system | embedded python3.10 on windows (push) Blocked by required conditions
CI / benchmarks (push) Blocked by required conditions

For listing files, we first use a directory writer for source dists,
which we will use for collecting the filenames instead of writing the
archive in the future. I've split breaking `lib.rs` of uv-build-backend
into modules into the next PR.

No logic changes, only restructuring.

Best reviewed commit-by-commit
This commit is contained in:
konsti 2024-12-02 16:57:04 +01:00 committed by GitHub
parent 5b27decbe7
commit ff91cf7730
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 206 additions and 138 deletions

View file

@ -20,7 +20,7 @@ use uv_distribution_filename::{SourceDistExtension, SourceDistFilename, WheelFil
use uv_fs::Simplified;
use uv_globfilter::{parse_portable_glob, GlobDirFilter, PortableGlobError};
use uv_warnings::warn_user_once;
use walkdir::{DirEntry, WalkDir};
use walkdir::WalkDir;
use zip::{CompressionMethod, ZipWriter};
#[derive(Debug, Error)]
@ -77,9 +77,10 @@ pub enum Error {
TarWrite(PathBuf, #[source] io::Error),
}
/// Allow dispatching between writing to a directory, writing to zip and writing to a `.tar.gz`.
/// Dispatcher between writing to a directory, writing to a zip, writing to a `.tar.gz` and
/// listing files.
///
/// All paths are string types instead of path types since wheel are portable between platforms.
/// All paths are string types instead of path types since wheels are portable between platforms.
///
/// Contract: You must call close before dropping to obtain a valid output (dropping is fine in the
/// error case).
@ -87,9 +88,6 @@ trait DirectoryWriter {
/// Add a file with the given content.
fn write_bytes(&mut self, path: &str, bytes: &[u8]) -> Result<(), Error>;
/// Add a file with the given name and return a writer for it.
fn new_writer<'slf>(&'slf mut self, path: &str) -> Result<Box<dyn Write + 'slf>, Error>;
/// Add a local file.
fn write_file(&mut self, path: &str, file: &Path) -> Result<(), Error>;
@ -129,6 +127,16 @@ impl ZipDirectoryWriter {
record: Vec::new(),
}
}
/// Add a file with the given name and return a writer for it.
fn new_writer<'slf>(&'slf mut self, path: &str) -> Result<Box<dyn Write + 'slf>, Error> {
// TODO(konsti): We need to preserve permissions, at least the executable bit.
self.writer.start_file(
path,
zip::write::FileOptions::default().compression_method(self.compression),
)?;
Ok(Box::new(&mut self.writer))
}
}
impl DirectoryWriter for ZipDirectoryWriter {
@ -148,15 +156,6 @@ impl DirectoryWriter for ZipDirectoryWriter {
Ok(())
}
fn new_writer<'slf>(&'slf mut self, path: &str) -> Result<Box<dyn Write + 'slf>, Error> {
// TODO(konsti): We need to preserve permissions, at least the executable bit.
self.writer.start_file(
path,
zip::write::FileOptions::default().compression_method(self.compression),
)?;
Ok(Box::new(&mut self.writer))
}
fn write_file(&mut self, path: &str, file: &Path) -> Result<(), Error> {
trace!("Adding {} from {}", path, file.user_display());
let mut reader = BufReader::new(File::open(file)?);
@ -186,24 +185,30 @@ impl DirectoryWriter for ZipDirectoryWriter {
}
}
struct FilesystemWrite {
struct FilesystemWriter {
/// The virtualenv or metadata directory that add file paths are relative to.
root: PathBuf,
/// The entries in the `RECORD` file.
record: Vec<RecordEntry>,
}
impl FilesystemWrite {
impl FilesystemWriter {
fn new(root: &Path) -> Self {
Self {
root: root.to_owned(),
record: Vec::new(),
}
}
/// Add a file with the given name and return a writer for it.
fn new_writer<'slf>(&'slf mut self, path: &str) -> Result<Box<dyn Write + 'slf>, Error> {
trace!("Adding {}", path);
Ok(Box::new(File::create(self.root.join(path))?))
}
}
/// File system writer.
impl DirectoryWriter for FilesystemWrite {
impl DirectoryWriter for FilesystemWriter {
fn write_bytes(&mut self, path: &str, bytes: &[u8]) -> Result<(), Error> {
trace!("Adding {}", path);
let hash = format!("{:x}", Sha256::new().chain_update(bytes).finalize());
@ -215,12 +220,6 @@ impl DirectoryWriter for FilesystemWrite {
Ok(fs_err::write(self.root.join(path), bytes)?)
}
fn new_writer<'slf>(&'slf mut self, path: &str) -> Result<Box<dyn Write + 'slf>, Error> {
trace!("Adding {}", path);
Ok(Box::new(File::create(self.root.join(path))?))
}
fn write_file(&mut self, path: &str, file: &Path) -> Result<(), Error> {
trace!("Adding {} from {}", path, file.user_display());
let mut reader = BufReader::new(File::open(file)?);
@ -249,6 +248,82 @@ impl DirectoryWriter for FilesystemWrite {
}
}
struct TarGzWriter {
path: PathBuf,
tar: tar::Builder<GzEncoder<File>>,
}
impl TarGzWriter {
fn new(path: impl Into<PathBuf>) -> Result<Self, Error> {
let path = path.into();
let file = File::create(&path)?;
let enc = GzEncoder::new(file, Compression::default());
let tar = tar::Builder::new(enc);
Ok(Self { path, tar })
}
}
impl DirectoryWriter for TarGzWriter {
fn write_bytes(&mut self, path: &str, bytes: &[u8]) -> Result<(), Error> {
let mut header = Header::new_gnu();
header.set_size(bytes.len() as u64);
// Reasonable default to avoid 0o000 permissions, the user's umask will be applied on
// unpacking.
header.set_mode(0o644);
header.set_cksum();
self.tar
.append_data(&mut header, path, Cursor::new(bytes))
.map_err(|err| Error::TarWrite(self.path.clone(), err))?;
Ok(())
}
fn write_file(&mut self, path: &str, file: &Path) -> Result<(), Error> {
let metadata = fs_err::metadata(file)?;
let mut header = Header::new_gnu();
#[cfg(unix)]
{
// Preserve for example an executable bit.
header.set_mode(std::os::unix::fs::MetadataExt::mode(&metadata));
}
#[cfg(not(unix))]
{
// Reasonable default to avoid 0o000 permissions, the user's umask will be applied on
// unpacking.
header.set_mode(0o644);
}
header.set_size(metadata.len());
header.set_cksum();
let reader = BufReader::new(File::open(file)?);
self.tar
.append_data(&mut header, path, reader)
.map_err(|err| Error::TarWrite(self.path.clone(), err))?;
Ok(())
}
fn write_directory(&mut self, directory: &str) -> Result<(), Error> {
let mut header = Header::new_gnu();
// Directories are always executable, which means they can be listed.
header.set_mode(0o755);
header.set_entry_type(EntryType::Directory);
header
.set_path(directory)
.map_err(|err| Error::TarWrite(self.path.clone(), err))?;
header.set_size(0);
header.set_cksum();
self.tar
.append(&header, io::empty())
.map_err(|err| Error::TarWrite(self.path.clone(), err))?;
Ok(())
}
fn close(mut self, _dist_info_dir: &str) -> Result<(), Error> {
self.tar
.finish()
.map_err(|err| Error::TarWrite(self.path.clone(), err))?;
Ok(())
}
}
/// An entry in the `RECORD` file.
///
/// <https://packaging.python.org/en/latest/specifications/recording-installed-packages/#the-record-file>
@ -604,6 +679,25 @@ pub fn build_source_dist(
source_tree: &Path,
source_dist_directory: &Path,
uv_version: &str,
) -> Result<SourceDistFilename, Error> {
let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?;
let pyproject_toml = PyProjectToml::parse(&contents)?;
let filename = SourceDistFilename {
name: pyproject_toml.name().clone(),
version: pyproject_toml.version().clone(),
extension: SourceDistExtension::TarGz,
};
let source_dist_path = source_dist_directory.join(filename.to_string());
let writer = TarGzWriter::new(&source_dist_path)?;
write_source_dist(source_tree, writer, uv_version)?;
Ok(filename)
}
/// Shared implementation for building and listing a source distribution.
fn write_source_dist(
source_tree: &Path,
mut writer: impl DirectoryWriter,
uv_version: &str,
) -> Result<SourceDistFilename, Error> {
let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?;
let pyproject_toml = PyProjectToml::parse(&contents)?;
@ -627,25 +721,93 @@ pub fn build_source_dist(
pyproject_toml.version()
);
let source_dist_path = source_dist_directory.join(filename.to_string());
let tar_gz = File::create(&source_dist_path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut tar = tar::Builder::new(enc);
let metadata = pyproject_toml.to_metadata(source_tree)?;
let metadata_email = metadata.core_metadata_format();
let mut header = Header::new_gnu();
header.set_size(metadata_email.bytes().len() as u64);
header.set_mode(0o644);
header.set_cksum();
tar.append_data(
&mut header,
Path::new(&top_level).join("PKG-INFO"),
Cursor::new(metadata_email),
)
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;
writer.write_bytes(
&Path::new(&top_level)
.join("PKG-INFO")
.portable_display()
.to_string(),
metadata_email.as_bytes(),
)?;
let (include_matcher, exclude_matcher) = source_dist_matcher(&pyproject_toml, settings)?;
let mut files_visited = 0;
for entry in WalkDir::new(source_tree).into_iter().filter_entry(|entry| {
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");
// Fast path: Don't descend into a directory that can't be included. This is the most
// important performance optimization, it avoids descending into directories such as
// `.venv`. While walkdir is generally cheap, we still avoid traversing large data
// directories that often exist on the top level of a project. This is especially noticeable
// on network file systems with high latencies per operation (while contiguous reading may
// still be fast).
include_matcher.match_directory(relative) && !exclude_matcher.is_match(relative)
}) {
let entry = entry.map_err(|err| Error::WalkDir {
root: source_tree.to_path_buf(),
err,
})?;
files_visited += 1;
if files_visited > 10000 {
warn_user_once!(
"Visited more than 10,000 files for source distribution build. \
Consider using more constrained includes or more excludes."
);
}
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");
if !include_matcher.match_path(relative) || exclude_matcher.is_match(relative) {
trace!("Excluding: `{}`", relative.user_display());
continue;
};
debug!("Including {}", relative.user_display());
if entry.file_type().is_dir() {
writer.write_directory(
&Path::new(&top_level)
.join(relative)
.portable_display()
.to_string(),
)?;
} else if entry.file_type().is_file() {
writer.write_file(
&Path::new(&top_level)
.join(relative)
.portable_display()
.to_string(),
entry.path(),
)?;
} else {
return Err(Error::UnsupportedFileType(
relative.to_path_buf(),
entry.file_type(),
));
}
}
debug!("Visited {files_visited} files for source dist build");
writer.close(&top_level)?;
Ok(filename)
}
/// Build includes and excludes for source tree walking for source dists.
fn source_dist_matcher(
pyproject_toml: &PyProjectToml,
settings: BuildBackendSettings,
) -> Result<(GlobDirFilter, GlobSet), Error> {
// File and directories to include in the source directory
let mut include_globs = Vec::new();
let mut includes: Vec<String> = settings.source_include;
@ -724,54 +886,7 @@ pub fn build_source_dist(
if exclude_matcher.is_match("pyproject.toml") {
return Err(Error::PyprojectTomlExcluded);
}
let mut files_visited = 0;
for entry in WalkDir::new(source_tree).into_iter().filter_entry(|entry| {
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");
// Fast path: Don't descend into a directory that can't be included. This is the most
// important performance optimization, it avoids descending into directories such as
// `.venv`. While walkdir is generally cheap, we still avoid traversing large data
// directories that often exist on the top level of a project. This is especially noticeable
// on network file systems with high latencies per operation (while contiguous reading may
// still be fast).
include_matcher.match_directory(relative) && !exclude_matcher.is_match(relative)
}) {
let entry = entry.map_err(|err| Error::WalkDir {
root: source_tree.to_path_buf(),
err,
})?;
files_visited += 1;
if files_visited > 10000 {
warn_user_once!(
"Visited more than 10,000 files for source distribution build. \
Consider using more constrained includes or more excludes."
);
}
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");
if !include_matcher.match_path(relative) || exclude_matcher.is_match(relative) {
trace!("Excluding: `{}`", relative.user_display());
continue;
};
add_source_dist_entry(&mut tar, &entry, &top_level, &source_dist_path, relative)?;
}
debug!("Visited {files_visited} files for source dist build");
tar.finish()
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;
Ok(filename)
Ok((include_matcher, exclude_matcher))
}
/// Build a globset matcher for excludes.
@ -802,55 +917,6 @@ fn build_exclude_matcher(
Ok(exclude_matcher)
}
/// Add a file or a directory to a source distribution.
fn add_source_dist_entry(
tar: &mut tar::Builder<GzEncoder<File>>,
entry: &DirEntry,
top_level: &str,
source_dist_path: &Path,
relative: &Path,
) -> Result<(), Error> {
debug!("Including {}", relative.user_display());
let metadata = fs_err::metadata(entry.path())?;
let mut header = Header::new_gnu();
#[cfg(unix)]
{
header.set_mode(std::os::unix::fs::MetadataExt::mode(&metadata));
}
#[cfg(not(unix))]
{
header.set_mode(0o644);
}
if entry.file_type().is_dir() {
header.set_entry_type(EntryType::Directory);
header
.set_path(Path::new(&top_level).join(relative))
.map_err(|err| Error::TarWrite(source_dist_path.to_path_buf(), err))?;
header.set_size(0);
header.set_cksum();
tar.append(&header, io::empty())
.map_err(|err| Error::TarWrite(source_dist_path.to_path_buf(), err))?;
Ok(())
} else if entry.file_type().is_file() {
header.set_size(metadata.len());
header.set_cksum();
tar.append_data(
&mut header,
Path::new(&top_level).join(relative),
BufReader::new(File::open(entry.path())?),
)
.map_err(|err| Error::TarWrite(source_dist_path.to_path_buf(), err))?;
Ok(())
} else {
Err(Error::UnsupportedFileType(
relative.to_path_buf(),
entry.file_type(),
))
}
}
/// Write the dist-info directory to the output directory without building the wheel.
pub fn metadata(
source_tree: &Path,
@ -876,7 +942,7 @@ pub fn metadata(
"Writing metadata files to {}",
metadata_directory.user_display()
);
let mut wheel_writer = FilesystemWrite::new(metadata_directory);
let mut wheel_writer = FilesystemWriter::new(metadata_directory);
let dist_info_dir = write_dist_info(
&mut wheel_writer,
&pyproject_toml,