This commit is contained in:
konsti 2025-07-05 01:08:14 +03:30 committed by GitHub
commit 4d11c1fa37
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 268 additions and 81 deletions

1
Cargo.lock generated
View file

@ -4785,6 +4785,7 @@ dependencies = [
"indoc", "indoc",
"insta", "insta",
"itertools 0.14.0", "itertools 0.14.0",
"rustc-hash",
"schemars", "schemars",
"serde", "serde",
"sha2", "sha2",

View file

@ -31,6 +31,7 @@ flate2 = { workspace = true, default-features = false }
fs-err = { workspace = true } fs-err = { workspace = true }
globset = { workspace = true } globset = { workspace = true }
itertools = { workspace = true } itertools = { workspace = true }
rustc-hash = { workspace = true }
schemars = { workspace = true, optional = true } schemars = { workspace = true, optional = true }
serde = { workspace = true } serde = { workspace = true }
sha2 = { workspace = true } sha2 = { workspace = true }

View file

@ -22,6 +22,7 @@ use uv_normalize::PackageName;
use uv_pypi_types::{Identifier, IdentifierParseError}; use uv_pypi_types::{Identifier, IdentifierParseError};
use crate::metadata::ValidationError; use crate::metadata::ValidationError;
use crate::settings::ModuleName;
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum Error { pub enum Error {
@ -184,7 +185,7 @@ fn check_metadata_directory(
Ok(()) Ok(())
} }
/// Returns the source root and the module path with the `__init__.py[i]` below to it while /// Returns the source root and the module path(s) with the `__init__.py[i]` below to it while
/// checking the project layout and names. /// checking the project layout and names.
/// ///
/// Some target platforms have case-sensitive filesystems, while others have case-insensitive /// Some target platforms have case-sensitive filesystems, while others have case-insensitive
@ -198,13 +199,15 @@ fn check_metadata_directory(
/// dist-info-normalization, the rules are lowercasing, replacing `.` with `_` and /// dist-info-normalization, the rules are lowercasing, replacing `.` with `_` and
/// replace `-` with `_`. Since `.` and `-` are not allowed in identifiers, we can use a string /// replace `-` with `_`. Since `.` and `-` are not allowed in identifiers, we can use a string
/// comparison with the module name. /// comparison with the module name.
///
/// While we recommend one module per package, it is possible to declare a list of modules.
fn find_roots( fn find_roots(
source_tree: &Path, source_tree: &Path,
pyproject_toml: &PyProjectToml, pyproject_toml: &PyProjectToml,
relative_module_root: &Path, relative_module_root: &Path,
module_name: Option<&str>, module_name: Option<&ModuleName>,
namespace: bool, namespace: bool,
) -> Result<(PathBuf, PathBuf), Error> { ) -> Result<(PathBuf, Vec<PathBuf>), Error> {
let relative_module_root = uv_fs::normalize_path(relative_module_root); let relative_module_root = uv_fs::normalize_path(relative_module_root);
let src_root = source_tree.join(&relative_module_root); let src_root = source_tree.join(&relative_module_root);
if !src_root.starts_with(source_tree) { if !src_root.starts_with(source_tree) {
@ -215,22 +218,45 @@ fn find_roots(
if namespace { if namespace {
// `namespace = true` disables module structure checks. // `namespace = true` disables module structure checks.
let module_relative = if let Some(module_name) = module_name { let modules_relative = if let Some(module_name) = module_name {
module_name.split('.').collect::<PathBuf>() match module_name {
ModuleName::Name(name) => {
vec![name.split('.').collect::<PathBuf>()]
}
ModuleName::Names(names) => names
.iter()
.map(|name| name.split('.').collect::<PathBuf>())
.collect(),
}
} else { } else {
PathBuf::from(pyproject_toml.name().as_dist_info_name().to_string()) vec![PathBuf::from(
pyproject_toml.name().as_dist_info_name().to_string(),
)]
}; };
debug!("Namespace module path: {}", module_relative.user_display()); for module_relative in &modules_relative {
return Ok((src_root, module_relative)); debug!("Namespace module path: {}", module_relative.user_display());
}
return Ok((src_root, modules_relative));
} }
let module_relative = if let Some(module_name) = module_name { let modules_relative = if let Some(module_name) = module_name {
module_path_from_module_name(&src_root, module_name)? match module_name {
ModuleName::Name(name) => vec![module_path_from_module_name(&src_root, name)?],
ModuleName::Names(names) => names
.iter()
.map(|name| module_path_from_module_name(&src_root, name))
.collect::<Result<_, _>>()?,
}
} else { } else {
find_module_path_from_package_name(&src_root, pyproject_toml.name())? vec![find_module_path_from_package_name(
&src_root,
pyproject_toml.name(),
)?]
}; };
debug!("Module path: {}", module_relative.user_display()); for module_relative in &modules_relative {
Ok((src_root, module_relative)) debug!("Module path: {}", module_relative.user_display());
}
Ok((src_root, modules_relative))
} }
/// Infer stubs packages from package name alone. /// Infer stubs packages from package name alone.
@ -410,6 +436,15 @@ mod tests {
}) })
} }
fn build_err(source_root: &Path) -> String {
let dist = TempDir::new().unwrap();
let build_err = build(source_root, dist.path()).unwrap_err();
let err_message: String = format_err(&build_err)
.replace(&source_root.user_display().to_string(), "[TEMP_PATH]")
.replace('\\', "/");
err_message
}
fn sdist_contents(source_dist_path: &Path) -> Vec<String> { fn sdist_contents(source_dist_path: &Path) -> Vec<String> {
let sdist_reader = BufReader::new(File::open(source_dist_path).unwrap()); let sdist_reader = BufReader::new(File::open(source_dist_path).unwrap());
let mut source_dist = tar::Archive::new(GzDecoder::new(sdist_reader)); let mut source_dist = tar::Archive::new(GzDecoder::new(sdist_reader));
@ -998,13 +1033,8 @@ mod tests {
fs_err::create_dir_all(src.path().join("src").join("simple_namespace").join("part")) fs_err::create_dir_all(src.path().join("src").join("simple_namespace").join("part"))
.unwrap(); .unwrap();
let dist = TempDir::new().unwrap();
let build_err = build(src.path(), dist.path()).unwrap_err();
let err_message = format_err(&build_err)
.replace(&src.path().user_display().to_string(), "[TEMP_PATH]")
.replace('\\', "/");
assert_snapshot!( assert_snapshot!(
err_message, build_err(src.path()),
@"Expected a Python module at: `[TEMP_PATH]/src/simple_namespace/part/__init__.py`" @"Expected a Python module at: `[TEMP_PATH]/src/simple_namespace/part/__init__.py`"
); );
@ -1025,16 +1055,13 @@ mod tests {
.join("simple_namespace") .join("simple_namespace")
.join("__init__.py"); .join("__init__.py");
File::create(&bogus_init_py).unwrap(); File::create(&bogus_init_py).unwrap();
let build_err = build(src.path(), dist.path()).unwrap_err();
let err_message = format_err(&build_err)
.replace(&src.path().user_display().to_string(), "[TEMP_PATH]")
.replace('\\', "/");
assert_snapshot!( assert_snapshot!(
err_message, build_err(src.path()),
@"For namespace packages, `__init__.py[i]` is not allowed in parent directory: `[TEMP_PATH]/src/simple_namespace`" @"For namespace packages, `__init__.py[i]` is not allowed in parent directory: `[TEMP_PATH]/src/simple_namespace`"
); );
fs_err::remove_file(bogus_init_py).unwrap(); fs_err::remove_file(bogus_init_py).unwrap();
let dist = TempDir::new().unwrap();
let build1 = build(src.path(), dist.path()).unwrap(); let build1 = build(src.path(), dist.path()).unwrap();
assert_snapshot!(build1.source_dist_contents.join("\n"), @r" assert_snapshot!(build1.source_dist_contents.join("\n"), @r"
simple_namespace_part-1.0.0/ simple_namespace_part-1.0.0/
@ -1209,4 +1236,117 @@ mod tests {
cloud_db_schema_stubs-1.0.0.dist-info/WHEEL cloud_db_schema_stubs-1.0.0.dist-info/WHEEL
"); ");
} }
/// A package with multiple modules, one a regular module and one a namespace package.
#[test]
fn multiple_module_names() {
let src = TempDir::new().unwrap();
let pyproject_toml = indoc! {r#"
[project]
name = "simple-namespace-part"
version = "1.0.0"
[tool.uv.build-backend]
module-name = ["foo", "simple_namespace.part_a", "simple_namespace.part_b"]
[build-system]
requires = ["uv_build>=0.5.15,<0.6"]
build-backend = "uv_build"
"#
};
fs_err::write(src.path().join("pyproject.toml"), pyproject_toml).unwrap();
fs_err::create_dir_all(src.path().join("src").join("foo")).unwrap();
fs_err::create_dir_all(
src.path()
.join("src")
.join("simple_namespace")
.join("part_a"),
)
.unwrap();
fs_err::create_dir_all(
src.path()
.join("src")
.join("simple_namespace")
.join("part_b"),
)
.unwrap();
// Most of these checks exist in other tests too, but we want to ensure that they apply
// with multiple modules too.
// The first module is missing an `__init__.py`.
assert_snapshot!(
build_err(src.path()),
@"Expected a Python module at: `[TEMP_PATH]/src/foo/__init__.py`"
);
// Create the first correct `__init__.py` file
File::create(src.path().join("src").join("foo").join("__init__.py")).unwrap();
// The second module, a namespace, is missing an `__init__.py`.
assert_snapshot!(
build_err(src.path()),
@"Expected a Python module at: `[TEMP_PATH]/src/simple_namespace/part_a/__init__.py`"
);
// Create the other two correct `__init__.py` files
File::create(
src.path()
.join("src")
.join("simple_namespace")
.join("part_a")
.join("__init__.py"),
)
.unwrap();
File::create(
src.path()
.join("src")
.join("simple_namespace")
.join("part_b")
.join("__init__.py"),
)
.unwrap();
// For the second module, a namespace, there must not be an `__init__.py` here.
let bogus_init_py = src
.path()
.join("src")
.join("simple_namespace")
.join("__init__.py");
File::create(&bogus_init_py).unwrap();
assert_snapshot!(
build_err(src.path()),
@"For namespace packages, `__init__.py[i]` is not allowed in parent directory: `[TEMP_PATH]/src/simple_namespace`"
);
fs_err::remove_file(bogus_init_py).unwrap();
let dist = TempDir::new().unwrap();
let build = build(src.path(), dist.path()).unwrap();
assert_snapshot!(build.source_dist_contents.join("\n"), @r"
simple_namespace_part-1.0.0/
simple_namespace_part-1.0.0/PKG-INFO
simple_namespace_part-1.0.0/pyproject.toml
simple_namespace_part-1.0.0/src
simple_namespace_part-1.0.0/src/foo
simple_namespace_part-1.0.0/src/foo/__init__.py
simple_namespace_part-1.0.0/src/simple_namespace
simple_namespace_part-1.0.0/src/simple_namespace/part_a
simple_namespace_part-1.0.0/src/simple_namespace/part_a/__init__.py
simple_namespace_part-1.0.0/src/simple_namespace/part_b
simple_namespace_part-1.0.0/src/simple_namespace/part_b/__init__.py
");
assert_snapshot!(build.wheel_contents.join("\n"), @r"
foo/
foo/__init__.py
simple_namespace/
simple_namespace/part_a/
simple_namespace/part_a/__init__.py
simple_namespace/part_b/
simple_namespace/part_b/__init__.py
simple_namespace_part-1.0.0.dist-info/
simple_namespace_part-1.0.0.dist-info/METADATA
simple_namespace_part-1.0.0.dist-info/RECORD
simple_namespace_part-1.0.0.dist-info/WHEEL
");
}
} }

View file

@ -34,15 +34,19 @@ pub struct BuildBackendSettings {
/// For namespace packages with a single module, the path can be dotted, e.g., `foo.bar` or /// For namespace packages with a single module, the path can be dotted, e.g., `foo.bar` or
/// `foo-stubs.bar`. /// `foo-stubs.bar`.
/// ///
/// For namespace packages with multiple modules, the path can be a list, e.g.,
/// `["foo", "bar"]`. We recommend using a single module per package, splitting multiple
/// packages into a workspace.
///
/// Note that using this option runs the risk of creating two packages with different names but /// Note that using this option runs the risk of creating two packages with different names but
/// the same module names. Installing such packages together leads to unspecified behavior, /// the same module names. Installing such packages together leads to unspecified behavior,
/// often with corrupted files or directory trees. /// often with corrupted files or directory trees.
#[option( #[option(
default = r#"None"#, default = r#"None"#,
value_type = "str", value_type = "str | list[str]",
example = r#"module-name = "sklearn""# example = r#"module-name = "sklearn""#
)] )]
pub module_name: Option<String>, pub module_name: Option<ModuleName>,
/// Glob expressions which files and directories to additionally include in the source /// Glob expressions which files and directories to additionally include in the source
/// distribution. /// distribution.
@ -181,6 +185,17 @@ impl Default for BuildBackendSettings {
} }
} }
/// Whether to include a single module or multiple modules.
#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[serde(untagged)]
pub enum ModuleName {
/// A single module name.
Name(String),
/// Multiple module names, which are all included.
Names(Vec<String>),
}
/// Data includes for wheels. /// Data includes for wheels.
/// ///
/// See `BuildBackendSettings::data`. /// See `BuildBackendSettings::data`.

View file

@ -68,22 +68,24 @@ fn source_dist_matcher(
includes.push(globset::escape("pyproject.toml")); includes.push(globset::escape("pyproject.toml"));
// Check that the source tree contains a module. // Check that the source tree contains a module.
let (src_root, module_relative) = find_roots( let (src_root, modules_relative) = find_roots(
source_tree, source_tree,
pyproject_toml, pyproject_toml,
&settings.module_root, &settings.module_root,
settings.module_name.as_deref(), settings.module_name.as_ref(),
settings.namespace, settings.namespace,
)?; )?;
// The wheel must not include any files included by the source distribution (at least until we for module_relative in modules_relative {
// have files generated in the source dist -> wheel build step). // The wheel must not include any files included by the source distribution (at least until we
let import_path = uv_fs::normalize_path( // have files generated in the source dist -> wheel build step).
&uv_fs::relative_to(src_root.join(module_relative), source_tree) let import_path = uv_fs::normalize_path(
.expect("module root is inside source tree"), &uv_fs::relative_to(src_root.join(module_relative), source_tree)
) .expect("module root is inside source tree"),
.portable_display() )
.to_string(); .portable_display()
includes.push(format!("{}/**", globset::escape(&import_path))); .to_string();
includes.push(format!("{}/**", globset::escape(&import_path)));
}
for include in includes { for include in includes {
let glob = PortableGlobParser::Uv let glob = PortableGlobParser::Uv
.parse(&include) .parse(&include)

View file

@ -1,6 +1,7 @@
use fs_err::File; use fs_err::File;
use globset::{GlobSet, GlobSetBuilder}; use globset::{GlobSet, GlobSetBuilder};
use itertools::Itertools; use itertools::Itertools;
use rustc_hash::FxHashSet;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::io::{BufReader, Read, Write}; use std::io::{BufReader, Read, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
@ -127,55 +128,61 @@ fn write_wheel(
source_tree, source_tree,
pyproject_toml, pyproject_toml,
&settings.module_root, &settings.module_root,
settings.module_name.as_deref(), settings.module_name.as_ref(),
settings.namespace, settings.namespace,
)?; )?;
// For convenience, have directories for the whole tree in the wheel
for ancestor in module_relative.ancestors().skip(1) {
if ancestor == Path::new("") {
continue;
}
wheel_writer.write_directory(&ancestor.portable_display().to_string())?;
}
let mut files_visited = 0; let mut files_visited = 0;
for entry in WalkDir::new(src_root.join(module_relative)) let mut prefix_directories = FxHashSet::default();
.sort_by_file_name() for module_relative in module_relative {
.into_iter() // For convenience, have directories for the whole tree in the wheel
.filter_entry(|entry| !exclude_matcher.is_match(entry.path())) for ancestor in module_relative.ancestors().skip(1) {
{ if ancestor == Path::new("") {
let entry = entry.map_err(|err| Error::WalkDir { continue;
root: source_tree.to_path_buf(), }
err, // Avoid duplicate directories in the zip.
})?; if prefix_directories.insert(ancestor.to_path_buf()) {
wheel_writer.write_directory(&ancestor.portable_display().to_string())?;
}
}
files_visited += 1; for entry in WalkDir::new(src_root.join(module_relative))
if files_visited > 10000 { .sort_by_file_name()
warn_user_once!( .into_iter()
"Visited more than 10,000 files for wheel build. \ .filter_entry(|entry| !exclude_matcher.is_match(entry.path()))
{
let entry = entry.map_err(|err| Error::WalkDir {
root: source_tree.to_path_buf(),
err,
})?;
files_visited += 1;
if files_visited > 10000 {
warn_user_once!(
"Visited more than 10,000 files for wheel build. \
Consider using more constrained includes or more excludes." Consider using more constrained includes or more excludes."
); );
} }
// We only want to take the module root, but since excludes start at the source tree root, // We only want to take the module root, but since excludes start at the source tree root,
// we strip higher than we iterate. // we strip higher than we iterate.
let match_path = entry let match_path = entry
.path() .path()
.strip_prefix(source_tree) .strip_prefix(source_tree)
.expect("walkdir starts with root"); .expect("walkdir starts with root");
let entry_path = entry let entry_path = entry
.path() .path()
.strip_prefix(&src_root) .strip_prefix(&src_root)
.expect("walkdir starts with root"); .expect("walkdir starts with root");
if exclude_matcher.is_match(match_path) { if exclude_matcher.is_match(match_path) {
trace!("Excluding from module: `{}`", match_path.user_display()); trace!("Excluding from module: `{}`", match_path.user_display());
continue; continue;
} }
let entry_path = entry_path.portable_display().to_string(); let entry_path = entry_path.portable_display().to_string();
debug!("Adding to wheel: {entry_path}"); debug!("Adding to wheel: {entry_path}");
wheel_writer.write_dir_entry(&entry, &entry_path)?; wheel_writer.write_dir_entry(&entry, &entry_path)?;
}
} }
debug!("Visited {files_visited} files for wheel build"); debug!("Visited {files_visited} files for wheel build");
@ -269,7 +276,7 @@ pub fn build_editable(
source_tree, source_tree,
&pyproject_toml, &pyproject_toml,
&settings.module_root, &settings.module_root,
settings.module_name.as_deref(), settings.module_name.as_ref(),
settings.namespace, settings.namespace,
)?; )?;

View file

@ -134,16 +134,37 @@ the project structure:
pyproject.toml pyproject.toml
src src
├── foo ├── foo
   └── __init__.py └── __init__.py
└── bar └── bar
└── __init__.py └── __init__.py
``` ```
While we do not recommend this structure (i.e., you should use a workspace with multiple packages While we do not recommend this structure (i.e., you should use a workspace with multiple packages
instead), it is supported via the `namespace` option: instead), it is supported by passing a list to the `module-name` option:
```toml title="pyproject.toml" ```toml title="pyproject.toml"
[tool.uv.build-backend] [tool.uv.build-backend]
module-name = ["foo", "bar"]
```
The `namespace = true` option offers an opt-out to enumerating all modules for complex namespace
packages:
```text
pyproject.toml
src
└── foo
├── bar
│ └── __init__.py
└── baz
└── __init__.py
```
And the configuration would be:
```toml title="pyproject.toml"
[tool.uv.build-backend]
module-name = "foo"
namespace = true namespace = true
``` ```