diff --git a/Cargo.lock b/Cargo.lock index f9e51c47a..50760c312 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4785,6 +4785,7 @@ dependencies = [ "indoc", "insta", "itertools 0.14.0", + "rustc-hash", "schemars", "serde", "sha2", diff --git a/crates/uv-build-backend/Cargo.toml b/crates/uv-build-backend/Cargo.toml index f23581662..7714423d4 100644 --- a/crates/uv-build-backend/Cargo.toml +++ b/crates/uv-build-backend/Cargo.toml @@ -31,6 +31,7 @@ flate2 = { workspace = true, default-features = false } fs-err = { workspace = true } globset = { workspace = true } itertools = { workspace = true } +rustc-hash = { workspace = true } schemars = { workspace = true, optional = true } serde = { workspace = true } sha2 = { workspace = true } diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index 548214c32..26e71edaf 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -22,6 +22,7 @@ use uv_normalize::PackageName; use uv_pypi_types::{Identifier, IdentifierParseError}; use crate::metadata::ValidationError; +use crate::settings::ModuleName; #[derive(Debug, Error)] pub enum Error { @@ -184,7 +185,7 @@ fn check_metadata_directory( Ok(()) } -/// Returns the source root and the module path with the `__init__.py[i]` below to it while +/// Returns the source root and the module path(s) with the `__init__.py[i]` below to it while /// checking the project layout and names. /// /// Some target platforms have case-sensitive filesystems, while others have case-insensitive @@ -198,13 +199,15 @@ fn check_metadata_directory( /// dist-info-normalization, the rules are lowercasing, replacing `.` with `_` and /// replace `-` with `_`. Since `.` and `-` are not allowed in identifiers, we can use a string /// comparison with the module name. +/// +/// While we recommend one module per package, it is possible to declare a list of modules. fn find_roots( source_tree: &Path, pyproject_toml: &PyProjectToml, relative_module_root: &Path, - module_name: Option<&str>, + module_name: Option<&ModuleName>, namespace: bool, -) -> Result<(PathBuf, PathBuf), Error> { +) -> Result<(PathBuf, Vec), Error> { let relative_module_root = uv_fs::normalize_path(relative_module_root); let src_root = source_tree.join(&relative_module_root); if !src_root.starts_with(source_tree) { @@ -215,22 +218,45 @@ fn find_roots( if namespace { // `namespace = true` disables module structure checks. - let module_relative = if let Some(module_name) = module_name { - module_name.split('.').collect::() + let modules_relative = if let Some(module_name) = module_name { + match module_name { + ModuleName::Name(name) => { + vec![name.split('.').collect::()] + } + ModuleName::Names(names) => names + .iter() + .map(|name| name.split('.').collect::()) + .collect(), + } } else { - PathBuf::from(pyproject_toml.name().as_dist_info_name().to_string()) + vec![PathBuf::from( + pyproject_toml.name().as_dist_info_name().to_string(), + )] }; - debug!("Namespace module path: {}", module_relative.user_display()); - return Ok((src_root, module_relative)); + for module_relative in &modules_relative { + debug!("Namespace module path: {}", module_relative.user_display()); + } + return Ok((src_root, modules_relative)); } - let module_relative = if let Some(module_name) = module_name { - module_path_from_module_name(&src_root, module_name)? + let modules_relative = if let Some(module_name) = module_name { + match module_name { + ModuleName::Name(name) => vec![module_path_from_module_name(&src_root, name)?], + ModuleName::Names(names) => names + .iter() + .map(|name| module_path_from_module_name(&src_root, name)) + .collect::>()?, + } } else { - find_module_path_from_package_name(&src_root, pyproject_toml.name())? + vec![find_module_path_from_package_name( + &src_root, + pyproject_toml.name(), + )?] }; - debug!("Module path: {}", module_relative.user_display()); - Ok((src_root, module_relative)) + for module_relative in &modules_relative { + debug!("Module path: {}", module_relative.user_display()); + } + Ok((src_root, modules_relative)) } /// Infer stubs packages from package name alone. @@ -410,6 +436,15 @@ mod tests { }) } + fn build_err(source_root: &Path) -> String { + let dist = TempDir::new().unwrap(); + let build_err = build(source_root, dist.path()).unwrap_err(); + let err_message: String = format_err(&build_err) + .replace(&source_root.user_display().to_string(), "[TEMP_PATH]") + .replace('\\', "/"); + err_message + } + fn sdist_contents(source_dist_path: &Path) -> Vec { let sdist_reader = BufReader::new(File::open(source_dist_path).unwrap()); let mut source_dist = tar::Archive::new(GzDecoder::new(sdist_reader)); @@ -998,13 +1033,8 @@ mod tests { fs_err::create_dir_all(src.path().join("src").join("simple_namespace").join("part")) .unwrap(); - let dist = TempDir::new().unwrap(); - let build_err = build(src.path(), dist.path()).unwrap_err(); - let err_message = format_err(&build_err) - .replace(&src.path().user_display().to_string(), "[TEMP_PATH]") - .replace('\\', "/"); assert_snapshot!( - err_message, + build_err(src.path()), @"Expected a Python module at: `[TEMP_PATH]/src/simple_namespace/part/__init__.py`" ); @@ -1025,16 +1055,13 @@ mod tests { .join("simple_namespace") .join("__init__.py"); File::create(&bogus_init_py).unwrap(); - let build_err = build(src.path(), dist.path()).unwrap_err(); - let err_message = format_err(&build_err) - .replace(&src.path().user_display().to_string(), "[TEMP_PATH]") - .replace('\\', "/"); assert_snapshot!( - err_message, + build_err(src.path()), @"For namespace packages, `__init__.py[i]` is not allowed in parent directory: `[TEMP_PATH]/src/simple_namespace`" ); fs_err::remove_file(bogus_init_py).unwrap(); + let dist = TempDir::new().unwrap(); let build1 = build(src.path(), dist.path()).unwrap(); assert_snapshot!(build1.source_dist_contents.join("\n"), @r" simple_namespace_part-1.0.0/ @@ -1209,4 +1236,117 @@ mod tests { cloud_db_schema_stubs-1.0.0.dist-info/WHEEL "); } + + /// A package with multiple modules, one a regular module and one a namespace package. + #[test] + fn multiple_module_names() { + let src = TempDir::new().unwrap(); + let pyproject_toml = indoc! {r#" + [project] + name = "simple-namespace-part" + version = "1.0.0" + + [tool.uv.build-backend] + module-name = ["foo", "simple_namespace.part_a", "simple_namespace.part_b"] + + [build-system] + requires = ["uv_build>=0.5.15,<0.6"] + build-backend = "uv_build" + "# + }; + fs_err::write(src.path().join("pyproject.toml"), pyproject_toml).unwrap(); + fs_err::create_dir_all(src.path().join("src").join("foo")).unwrap(); + fs_err::create_dir_all( + src.path() + .join("src") + .join("simple_namespace") + .join("part_a"), + ) + .unwrap(); + fs_err::create_dir_all( + src.path() + .join("src") + .join("simple_namespace") + .join("part_b"), + ) + .unwrap(); + + // Most of these checks exist in other tests too, but we want to ensure that they apply + // with multiple modules too. + + // The first module is missing an `__init__.py`. + assert_snapshot!( + build_err(src.path()), + @"Expected a Python module at: `[TEMP_PATH]/src/foo/__init__.py`" + ); + + // Create the first correct `__init__.py` file + File::create(src.path().join("src").join("foo").join("__init__.py")).unwrap(); + + // The second module, a namespace, is missing an `__init__.py`. + assert_snapshot!( + build_err(src.path()), + @"Expected a Python module at: `[TEMP_PATH]/src/simple_namespace/part_a/__init__.py`" + ); + + // Create the other two correct `__init__.py` files + File::create( + src.path() + .join("src") + .join("simple_namespace") + .join("part_a") + .join("__init__.py"), + ) + .unwrap(); + File::create( + src.path() + .join("src") + .join("simple_namespace") + .join("part_b") + .join("__init__.py"), + ) + .unwrap(); + + // For the second module, a namespace, there must not be an `__init__.py` here. + let bogus_init_py = src + .path() + .join("src") + .join("simple_namespace") + .join("__init__.py"); + File::create(&bogus_init_py).unwrap(); + assert_snapshot!( + build_err(src.path()), + @"For namespace packages, `__init__.py[i]` is not allowed in parent directory: `[TEMP_PATH]/src/simple_namespace`" + ); + fs_err::remove_file(bogus_init_py).unwrap(); + + let dist = TempDir::new().unwrap(); + let build = build(src.path(), dist.path()).unwrap(); + assert_snapshot!(build.source_dist_contents.join("\n"), @r" + simple_namespace_part-1.0.0/ + simple_namespace_part-1.0.0/PKG-INFO + simple_namespace_part-1.0.0/pyproject.toml + simple_namespace_part-1.0.0/src + simple_namespace_part-1.0.0/src/foo + simple_namespace_part-1.0.0/src/foo/__init__.py + simple_namespace_part-1.0.0/src/simple_namespace + simple_namespace_part-1.0.0/src/simple_namespace/part_a + simple_namespace_part-1.0.0/src/simple_namespace/part_a/__init__.py + simple_namespace_part-1.0.0/src/simple_namespace/part_b + simple_namespace_part-1.0.0/src/simple_namespace/part_b/__init__.py + "); + assert_snapshot!(build.wheel_contents.join("\n"), @r" + foo/ + foo/__init__.py + simple_namespace/ + simple_namespace/part_a/ + simple_namespace/part_a/__init__.py + simple_namespace/part_b/ + simple_namespace/part_b/__init__.py + simple_namespace_part-1.0.0.dist-info/ + simple_namespace_part-1.0.0.dist-info/METADATA + simple_namespace_part-1.0.0.dist-info/RECORD + simple_namespace_part-1.0.0.dist-info/WHEEL + "); + } } diff --git a/crates/uv-build-backend/src/settings.rs b/crates/uv-build-backend/src/settings.rs index 3b413e8e3..9e9e44961 100644 --- a/crates/uv-build-backend/src/settings.rs +++ b/crates/uv-build-backend/src/settings.rs @@ -34,15 +34,19 @@ pub struct BuildBackendSettings { /// For namespace packages with a single module, the path can be dotted, e.g., `foo.bar` or /// `foo-stubs.bar`. /// + /// For namespace packages with multiple modules, the path can be a list, e.g., + /// `["foo", "bar"]`. We recommend using a single module per package, splitting multiple + /// packages into a workspace. + /// /// Note that using this option runs the risk of creating two packages with different names but /// the same module names. Installing such packages together leads to unspecified behavior, /// often with corrupted files or directory trees. #[option( default = r#"None"#, - value_type = "str", + value_type = "str | list[str]", example = r#"module-name = "sklearn""# )] - pub module_name: Option, + pub module_name: Option, /// Glob expressions which files and directories to additionally include in the source /// distribution. @@ -181,6 +185,17 @@ impl Default for BuildBackendSettings { } } +/// Whether to include a single module or multiple modules. +#[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)] +#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +#[serde(untagged)] +pub enum ModuleName { + /// A single module name. + Name(String), + /// Multiple module names, which are all included. + Names(Vec), +} + /// Data includes for wheels. /// /// See `BuildBackendSettings::data`. diff --git a/crates/uv-build-backend/src/source_dist.rs b/crates/uv-build-backend/src/source_dist.rs index 0a302ccf2..3b6d11ba4 100644 --- a/crates/uv-build-backend/src/source_dist.rs +++ b/crates/uv-build-backend/src/source_dist.rs @@ -68,22 +68,24 @@ fn source_dist_matcher( includes.push(globset::escape("pyproject.toml")); // Check that the source tree contains a module. - let (src_root, module_relative) = find_roots( + let (src_root, modules_relative) = find_roots( source_tree, pyproject_toml, &settings.module_root, - settings.module_name.as_deref(), + settings.module_name.as_ref(), settings.namespace, )?; - // The wheel must not include any files included by the source distribution (at least until we - // have files generated in the source dist -> wheel build step). - let import_path = uv_fs::normalize_path( - &uv_fs::relative_to(src_root.join(module_relative), source_tree) - .expect("module root is inside source tree"), - ) - .portable_display() - .to_string(); - includes.push(format!("{}/**", globset::escape(&import_path))); + for module_relative in modules_relative { + // The wheel must not include any files included by the source distribution (at least until we + // have files generated in the source dist -> wheel build step). + let import_path = uv_fs::normalize_path( + &uv_fs::relative_to(src_root.join(module_relative), source_tree) + .expect("module root is inside source tree"), + ) + .portable_display() + .to_string(); + includes.push(format!("{}/**", globset::escape(&import_path))); + } for include in includes { let glob = PortableGlobParser::Uv .parse(&include) diff --git a/crates/uv-build-backend/src/wheel.rs b/crates/uv-build-backend/src/wheel.rs index 7da232941..6eeb899d0 100644 --- a/crates/uv-build-backend/src/wheel.rs +++ b/crates/uv-build-backend/src/wheel.rs @@ -1,6 +1,7 @@ use fs_err::File; use globset::{GlobSet, GlobSetBuilder}; use itertools::Itertools; +use rustc_hash::FxHashSet; use sha2::{Digest, Sha256}; use std::io::{BufReader, Read, Write}; use std::path::{Path, PathBuf}; @@ -127,55 +128,61 @@ fn write_wheel( source_tree, pyproject_toml, &settings.module_root, - settings.module_name.as_deref(), + settings.module_name.as_ref(), settings.namespace, )?; - // For convenience, have directories for the whole tree in the wheel - for ancestor in module_relative.ancestors().skip(1) { - if ancestor == Path::new("") { - continue; - } - wheel_writer.write_directory(&ancestor.portable_display().to_string())?; - } - let mut files_visited = 0; - for entry in WalkDir::new(src_root.join(module_relative)) - .sort_by_file_name() - .into_iter() - .filter_entry(|entry| !exclude_matcher.is_match(entry.path())) - { - let entry = entry.map_err(|err| Error::WalkDir { - root: source_tree.to_path_buf(), - err, - })?; + let mut prefix_directories = FxHashSet::default(); + for module_relative in module_relative { + // For convenience, have directories for the whole tree in the wheel + for ancestor in module_relative.ancestors().skip(1) { + if ancestor == Path::new("") { + continue; + } + // Avoid duplicate directories in the zip. + if prefix_directories.insert(ancestor.to_path_buf()) { + wheel_writer.write_directory(&ancestor.portable_display().to_string())?; + } + } - files_visited += 1; - if files_visited > 10000 { - warn_user_once!( - "Visited more than 10,000 files for wheel build. \ + for entry in WalkDir::new(src_root.join(module_relative)) + .sort_by_file_name() + .into_iter() + .filter_entry(|entry| !exclude_matcher.is_match(entry.path())) + { + let entry = entry.map_err(|err| Error::WalkDir { + root: source_tree.to_path_buf(), + err, + })?; + + files_visited += 1; + if files_visited > 10000 { + warn_user_once!( + "Visited more than 10,000 files for wheel build. \ Consider using more constrained includes or more excludes." - ); - } + ); + } - // We only want to take the module root, but since excludes start at the source tree root, - // we strip higher than we iterate. - let match_path = entry - .path() - .strip_prefix(source_tree) - .expect("walkdir starts with root"); - let entry_path = entry - .path() - .strip_prefix(&src_root) - .expect("walkdir starts with root"); - if exclude_matcher.is_match(match_path) { - trace!("Excluding from module: `{}`", match_path.user_display()); - continue; - } + // We only want to take the module root, but since excludes start at the source tree root, + // we strip higher than we iterate. + let match_path = entry + .path() + .strip_prefix(source_tree) + .expect("walkdir starts with root"); + let entry_path = entry + .path() + .strip_prefix(&src_root) + .expect("walkdir starts with root"); + if exclude_matcher.is_match(match_path) { + trace!("Excluding from module: `{}`", match_path.user_display()); + continue; + } - let entry_path = entry_path.portable_display().to_string(); - debug!("Adding to wheel: {entry_path}"); - wheel_writer.write_dir_entry(&entry, &entry_path)?; + let entry_path = entry_path.portable_display().to_string(); + debug!("Adding to wheel: {entry_path}"); + wheel_writer.write_dir_entry(&entry, &entry_path)?; + } } debug!("Visited {files_visited} files for wheel build"); @@ -269,7 +276,7 @@ pub fn build_editable( source_tree, &pyproject_toml, &settings.module_root, - settings.module_name.as_deref(), + settings.module_name.as_ref(), settings.namespace, )?; diff --git a/docs/concepts/build-backend.md b/docs/concepts/build-backend.md index e68069ddb..868fa09af 100644 --- a/docs/concepts/build-backend.md +++ b/docs/concepts/build-backend.md @@ -134,16 +134,37 @@ the project structure: pyproject.toml src ├── foo -│   └── __init__.py +│ └── __init__.py └── bar └── __init__.py ``` While we do not recommend this structure (i.e., you should use a workspace with multiple packages -instead), it is supported via the `namespace` option: +instead), it is supported by passing a list to the `module-name` option: ```toml title="pyproject.toml" [tool.uv.build-backend] +module-name = ["foo", "bar"] +``` + +The `namespace = true` option offers an opt-out to enumerating all modules for complex namespace +packages: + +```text +pyproject.toml +src +└── foo + ├── bar + │ └── __init__.py + └── baz + └── __init__.py +``` + +And the configuration would be: + +```toml title="pyproject.toml" +[tool.uv.build-backend] +module-name = "foo" namespace = true ```