mirror of
https://github.com/astral-sh/uv.git
synced 2025-11-20 11:56:03 +00:00
Enforce UTF‑8-encoded license files during uv build (#16699)
I noticed this when working on https://github.com/astral-sh/uv/pull/16697. [PEP 639](https://peps.python.org/pep-0639/#add-license-files-key) expects tools to ship license texts as UTF‑8, but previously `uv build` would quietly include any binary blob listed under `project.license-files`. I have no clue what is going on with `rustfmt` for this file, but it seems that when I add the check, it wants to reformat a bunch of surrounding stuff. The relevant part to look at is: ```rust for license_file in &license_files { let file_path = root.join(license_file); let bytes = fs_err::read(&file_path)?; if str::from_utf8(&bytes).is_err() { return Err(ValidationError::LicenseFileNotUtf8(license_file.clone()).into()); } } ``` where we validate all collected license files before proceeding. --------- Co-authored-by: konstin <konstin@mailbox.org>
This commit is contained in:
parent
c167146f8c
commit
e28dc62358
2 changed files with 160 additions and 95 deletions
|
|
@ -3,7 +3,7 @@ use std::ffi::OsStr;
|
|||
use std::fmt::Display;
|
||||
use std::fmt::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
use std::str::{self, FromStr};
|
||||
|
||||
use itertools::Itertools;
|
||||
use serde::{Deserialize, Deserializer};
|
||||
|
|
@ -60,6 +60,8 @@ pub enum ValidationError {
|
|||
ReservedGuiScripts,
|
||||
#[error("`project.license` is not a valid SPDX expression: {0}")]
|
||||
InvalidSpdx(String, #[source] spdx::error::ParseError),
|
||||
#[error("License file `{}` must be UTF-8 encoded", _0)]
|
||||
LicenseFileNotUtf8(String),
|
||||
}
|
||||
|
||||
/// Check if the build backend is matching the currently running uv version.
|
||||
|
|
@ -339,99 +341,7 @@ impl PyProjectToml {
|
|||
"2.3"
|
||||
};
|
||||
|
||||
// TODO(konsti): Issue a warning on old license metadata once PEP 639 is universal.
|
||||
let (license, license_expression, license_files) =
|
||||
if let Some(license_globs) = &self.project.license_files {
|
||||
let license_expression = match &self.project.license {
|
||||
None => None,
|
||||
Some(License::Spdx(license_expression)) => Some(license_expression.clone()),
|
||||
Some(License::Text { .. } | License::File { .. }) => {
|
||||
return Err(ValidationError::MixedLicenseGenerations.into());
|
||||
}
|
||||
};
|
||||
|
||||
let mut license_files = Vec::new();
|
||||
let mut license_globs_parsed = Vec::new();
|
||||
for license_glob in license_globs {
|
||||
let pep639_glob =
|
||||
PortableGlobParser::Pep639
|
||||
.parse(license_glob)
|
||||
.map_err(|err| Error::PortableGlob {
|
||||
field: license_glob.to_owned(),
|
||||
source: err,
|
||||
})?;
|
||||
license_globs_parsed.push(pep639_glob);
|
||||
}
|
||||
let license_globs =
|
||||
GlobDirFilter::from_globs(&license_globs_parsed).map_err(|err| {
|
||||
Error::GlobSetTooLarge {
|
||||
field: "tool.uv.build-backend.source-include".to_string(),
|
||||
source: err,
|
||||
}
|
||||
})?;
|
||||
|
||||
for entry in WalkDir::new(root)
|
||||
.sort_by_file_name()
|
||||
.into_iter()
|
||||
.filter_entry(|entry| {
|
||||
license_globs.match_directory(
|
||||
entry
|
||||
.path()
|
||||
.strip_prefix(root)
|
||||
.expect("walkdir starts with root"),
|
||||
)
|
||||
})
|
||||
{
|
||||
let entry = entry.map_err(|err| Error::WalkDir {
|
||||
root: root.to_path_buf(),
|
||||
err,
|
||||
})?;
|
||||
let relative = entry
|
||||
.path()
|
||||
.strip_prefix(root)
|
||||
.expect("walkdir starts with root");
|
||||
if !license_globs.match_path(relative) {
|
||||
trace!("Not a license files match: {}", relative.user_display());
|
||||
continue;
|
||||
}
|
||||
if !entry.file_type().is_file() {
|
||||
trace!(
|
||||
"Not a file in license files match: {}",
|
||||
relative.user_display()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
error_on_venv(entry.file_name(), entry.path())?;
|
||||
|
||||
debug!("License files match: {}", relative.user_display());
|
||||
license_files.push(relative.portable_display().to_string());
|
||||
}
|
||||
|
||||
// The glob order may be unstable
|
||||
license_files.sort();
|
||||
|
||||
(None, license_expression, license_files)
|
||||
} else {
|
||||
match &self.project.license {
|
||||
None => (None, None, Vec::new()),
|
||||
Some(License::Spdx(license_expression)) => {
|
||||
(None, Some(license_expression.clone()), Vec::new())
|
||||
}
|
||||
Some(License::Text { text }) => (Some(text.clone()), None, Vec::new()),
|
||||
Some(License::File { file }) => {
|
||||
let text = fs_err::read_to_string(root.join(file))?;
|
||||
(Some(text), None, Vec::new())
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Check that the license expression is a valid SPDX identifier.
|
||||
if let Some(license_expression) = &license_expression {
|
||||
if let Err(err) = spdx::Expression::parse(license_expression) {
|
||||
return Err(ValidationError::InvalidSpdx(license_expression.clone(), err).into());
|
||||
}
|
||||
}
|
||||
let (license, license_expression, license_files) = self.license_metadata(root)?;
|
||||
|
||||
// TODO(konsti): https://peps.python.org/pep-0753/#label-normalization (Draft)
|
||||
let project_urls = self
|
||||
|
|
@ -518,6 +428,118 @@ impl PyProjectToml {
|
|||
})
|
||||
}
|
||||
|
||||
/// Parse and validate the old (PEP 621) and new (PEP 639) license files.
|
||||
#[allow(clippy::type_complexity)]
|
||||
fn license_metadata(
|
||||
&self,
|
||||
root: &Path,
|
||||
) -> Result<(Option<String>, Option<String>, Vec<String>), Error> {
|
||||
// TODO(konsti): Issue a warning on old license metadata once PEP 639 is universal.
|
||||
let (license, license_expression, license_files) = if let Some(license_globs) =
|
||||
&self.project.license_files
|
||||
{
|
||||
let license_expression = match &self.project.license {
|
||||
None => None,
|
||||
Some(License::Spdx(license_expression)) => Some(license_expression.clone()),
|
||||
Some(License::Text { .. } | License::File { .. }) => {
|
||||
return Err(ValidationError::MixedLicenseGenerations.into());
|
||||
}
|
||||
};
|
||||
|
||||
let mut license_files = Vec::new();
|
||||
let mut license_globs_parsed = Vec::new();
|
||||
for license_glob in license_globs {
|
||||
let pep639_glob =
|
||||
PortableGlobParser::Pep639
|
||||
.parse(license_glob)
|
||||
.map_err(|err| Error::PortableGlob {
|
||||
field: license_glob.to_owned(),
|
||||
source: err,
|
||||
})?;
|
||||
license_globs_parsed.push(pep639_glob);
|
||||
}
|
||||
let license_globs =
|
||||
GlobDirFilter::from_globs(&license_globs_parsed).map_err(|err| {
|
||||
Error::GlobSetTooLarge {
|
||||
field: "tool.uv.build-backend.source-include".to_string(),
|
||||
source: err,
|
||||
}
|
||||
})?;
|
||||
|
||||
for entry in WalkDir::new(root)
|
||||
.sort_by_file_name()
|
||||
.into_iter()
|
||||
.filter_entry(|entry| {
|
||||
license_globs.match_directory(
|
||||
entry
|
||||
.path()
|
||||
.strip_prefix(root)
|
||||
.expect("walkdir starts with root"),
|
||||
)
|
||||
})
|
||||
{
|
||||
let entry = entry.map_err(|err| Error::WalkDir {
|
||||
root: root.to_path_buf(),
|
||||
err,
|
||||
})?;
|
||||
let relative = entry
|
||||
.path()
|
||||
.strip_prefix(root)
|
||||
.expect("walkdir starts with root");
|
||||
if !license_globs.match_path(relative) {
|
||||
trace!("Not a license files match: {}", relative.user_display());
|
||||
continue;
|
||||
}
|
||||
if !entry.file_type().is_file() {
|
||||
trace!(
|
||||
"Not a file in license files match: {}",
|
||||
relative.user_display()
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
error_on_venv(entry.file_name(), entry.path())?;
|
||||
|
||||
debug!("License files match: {}", relative.user_display());
|
||||
license_files.push(relative.portable_display().to_string());
|
||||
}
|
||||
|
||||
for license_file in &license_files {
|
||||
let file_path = root.join(license_file);
|
||||
let bytes = fs_err::read(&file_path)?;
|
||||
if str::from_utf8(&bytes).is_err() {
|
||||
return Err(ValidationError::LicenseFileNotUtf8(license_file.clone()).into());
|
||||
}
|
||||
}
|
||||
|
||||
// The glob order may be unstable
|
||||
license_files.sort();
|
||||
|
||||
(None, license_expression, license_files)
|
||||
} else {
|
||||
match &self.project.license {
|
||||
None => (None, None, Vec::new()),
|
||||
Some(License::Spdx(license_expression)) => {
|
||||
(None, Some(license_expression.clone()), Vec::new())
|
||||
}
|
||||
Some(License::Text { text }) => (Some(text.clone()), None, Vec::new()),
|
||||
Some(License::File { file }) => {
|
||||
let text = fs_err::read_to_string(root.join(file))?;
|
||||
(Some(text), None, Vec::new())
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Check that the license expression is a valid SPDX identifier.
|
||||
if let Some(license_expression) = &license_expression {
|
||||
if let Err(err) = spdx::Expression::parse(license_expression) {
|
||||
return Err(ValidationError::InvalidSpdx(license_expression.clone(), err).into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok((license, license_expression, license_files))
|
||||
}
|
||||
|
||||
/// Validate and convert the entrypoints in `pyproject.toml`, including console and GUI scripts,
|
||||
/// to an `entry_points.txt`.
|
||||
///
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use crate::common::{TestContext, uv_snapshot, venv_bin_path};
|
||||
use anyhow::Result;
|
||||
use assert_cmd::assert::OutputAssertExt;
|
||||
use assert_fs::fixture::{FileTouch, FileWriteStr, PathChild, PathCreateDir};
|
||||
use assert_fs::fixture::{FileTouch, FileWriteBin, FileWriteStr, PathChild, PathCreateDir};
|
||||
use flate2::bufread::GzDecoder;
|
||||
use fs_err::File;
|
||||
use indoc::{formatdoc, indoc};
|
||||
|
|
@ -760,6 +760,49 @@ fn complex_namespace_packages() -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn license_file_must_be_utf8() -> Result<()> {
|
||||
let context = TestContext::new("3.12");
|
||||
|
||||
let project = context.temp_dir.child("license-utf8");
|
||||
context
|
||||
.init()
|
||||
.arg("--lib")
|
||||
.arg(project.path())
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
project.child("pyproject.toml").write_str(indoc! {r#"
|
||||
[project]
|
||||
name = "license-utf8"
|
||||
version = "1.0.0"
|
||||
license-files = ["LICENSE.bin"]
|
||||
|
||||
[build-system]
|
||||
requires = ["uv_build>=0.7,<10000"]
|
||||
build-backend = "uv_build"
|
||||
"#
|
||||
})?;
|
||||
|
||||
project.child("LICENSE.bin").write_binary(&[0xff])?;
|
||||
|
||||
uv_snapshot!(context
|
||||
.build_backend()
|
||||
.arg("build-wheel")
|
||||
.arg(context.temp_dir.path())
|
||||
.current_dir(project.path()), @r###"
|
||||
success: false
|
||||
exit_code: 2
|
||||
----- stdout -----
|
||||
|
||||
----- stderr -----
|
||||
error: Invalid pyproject.toml
|
||||
Caused by: License file `LICENSE.bin` must be UTF-8 encoded
|
||||
"###);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Test that a symlinked file (here: license) gets included.
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue