mirror of
https://github.com/astral-sh/uv.git
synced 2025-11-20 11:56:03 +00:00
Enforce UTF‑8-encoded license files during uv build (#16699)
I noticed this when working on https://github.com/astral-sh/uv/pull/16697. [PEP 639](https://peps.python.org/pep-0639/#add-license-files-key) expects tools to ship license texts as UTF‑8, but previously `uv build` would quietly include any binary blob listed under `project.license-files`. I have no clue what is going on with `rustfmt` for this file, but it seems that when I add the check, it wants to reformat a bunch of surrounding stuff. The relevant part to look at is: ```rust for license_file in &license_files { let file_path = root.join(license_file); let bytes = fs_err::read(&file_path)?; if str::from_utf8(&bytes).is_err() { return Err(ValidationError::LicenseFileNotUtf8(license_file.clone()).into()); } } ``` where we validate all collected license files before proceeding. --------- Co-authored-by: konstin <konstin@mailbox.org>
This commit is contained in:
parent
c167146f8c
commit
e28dc62358
2 changed files with 160 additions and 95 deletions
|
|
@ -3,7 +3,7 @@ use std::ffi::OsStr;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::fmt::Write;
|
use std::fmt::Write;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::str::FromStr;
|
use std::str::{self, FromStr};
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
|
|
@ -60,6 +60,8 @@ pub enum ValidationError {
|
||||||
ReservedGuiScripts,
|
ReservedGuiScripts,
|
||||||
#[error("`project.license` is not a valid SPDX expression: {0}")]
|
#[error("`project.license` is not a valid SPDX expression: {0}")]
|
||||||
InvalidSpdx(String, #[source] spdx::error::ParseError),
|
InvalidSpdx(String, #[source] spdx::error::ParseError),
|
||||||
|
#[error("License file `{}` must be UTF-8 encoded", _0)]
|
||||||
|
LicenseFileNotUtf8(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if the build backend is matching the currently running uv version.
|
/// Check if the build backend is matching the currently running uv version.
|
||||||
|
|
@ -339,99 +341,7 @@ impl PyProjectToml {
|
||||||
"2.3"
|
"2.3"
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO(konsti): Issue a warning on old license metadata once PEP 639 is universal.
|
let (license, license_expression, license_files) = self.license_metadata(root)?;
|
||||||
let (license, license_expression, license_files) =
|
|
||||||
if let Some(license_globs) = &self.project.license_files {
|
|
||||||
let license_expression = match &self.project.license {
|
|
||||||
None => None,
|
|
||||||
Some(License::Spdx(license_expression)) => Some(license_expression.clone()),
|
|
||||||
Some(License::Text { .. } | License::File { .. }) => {
|
|
||||||
return Err(ValidationError::MixedLicenseGenerations.into());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut license_files = Vec::new();
|
|
||||||
let mut license_globs_parsed = Vec::new();
|
|
||||||
for license_glob in license_globs {
|
|
||||||
let pep639_glob =
|
|
||||||
PortableGlobParser::Pep639
|
|
||||||
.parse(license_glob)
|
|
||||||
.map_err(|err| Error::PortableGlob {
|
|
||||||
field: license_glob.to_owned(),
|
|
||||||
source: err,
|
|
||||||
})?;
|
|
||||||
license_globs_parsed.push(pep639_glob);
|
|
||||||
}
|
|
||||||
let license_globs =
|
|
||||||
GlobDirFilter::from_globs(&license_globs_parsed).map_err(|err| {
|
|
||||||
Error::GlobSetTooLarge {
|
|
||||||
field: "tool.uv.build-backend.source-include".to_string(),
|
|
||||||
source: err,
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
|
|
||||||
for entry in WalkDir::new(root)
|
|
||||||
.sort_by_file_name()
|
|
||||||
.into_iter()
|
|
||||||
.filter_entry(|entry| {
|
|
||||||
license_globs.match_directory(
|
|
||||||
entry
|
|
||||||
.path()
|
|
||||||
.strip_prefix(root)
|
|
||||||
.expect("walkdir starts with root"),
|
|
||||||
)
|
|
||||||
})
|
|
||||||
{
|
|
||||||
let entry = entry.map_err(|err| Error::WalkDir {
|
|
||||||
root: root.to_path_buf(),
|
|
||||||
err,
|
|
||||||
})?;
|
|
||||||
let relative = entry
|
|
||||||
.path()
|
|
||||||
.strip_prefix(root)
|
|
||||||
.expect("walkdir starts with root");
|
|
||||||
if !license_globs.match_path(relative) {
|
|
||||||
trace!("Not a license files match: {}", relative.user_display());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if !entry.file_type().is_file() {
|
|
||||||
trace!(
|
|
||||||
"Not a file in license files match: {}",
|
|
||||||
relative.user_display()
|
|
||||||
);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
error_on_venv(entry.file_name(), entry.path())?;
|
|
||||||
|
|
||||||
debug!("License files match: {}", relative.user_display());
|
|
||||||
license_files.push(relative.portable_display().to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
// The glob order may be unstable
|
|
||||||
license_files.sort();
|
|
||||||
|
|
||||||
(None, license_expression, license_files)
|
|
||||||
} else {
|
|
||||||
match &self.project.license {
|
|
||||||
None => (None, None, Vec::new()),
|
|
||||||
Some(License::Spdx(license_expression)) => {
|
|
||||||
(None, Some(license_expression.clone()), Vec::new())
|
|
||||||
}
|
|
||||||
Some(License::Text { text }) => (Some(text.clone()), None, Vec::new()),
|
|
||||||
Some(License::File { file }) => {
|
|
||||||
let text = fs_err::read_to_string(root.join(file))?;
|
|
||||||
(Some(text), None, Vec::new())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Check that the license expression is a valid SPDX identifier.
|
|
||||||
if let Some(license_expression) = &license_expression {
|
|
||||||
if let Err(err) = spdx::Expression::parse(license_expression) {
|
|
||||||
return Err(ValidationError::InvalidSpdx(license_expression.clone(), err).into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO(konsti): https://peps.python.org/pep-0753/#label-normalization (Draft)
|
// TODO(konsti): https://peps.python.org/pep-0753/#label-normalization (Draft)
|
||||||
let project_urls = self
|
let project_urls = self
|
||||||
|
|
@ -518,6 +428,118 @@ impl PyProjectToml {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse and validate the old (PEP 621) and new (PEP 639) license files.
|
||||||
|
#[allow(clippy::type_complexity)]
|
||||||
|
fn license_metadata(
|
||||||
|
&self,
|
||||||
|
root: &Path,
|
||||||
|
) -> Result<(Option<String>, Option<String>, Vec<String>), Error> {
|
||||||
|
// TODO(konsti): Issue a warning on old license metadata once PEP 639 is universal.
|
||||||
|
let (license, license_expression, license_files) = if let Some(license_globs) =
|
||||||
|
&self.project.license_files
|
||||||
|
{
|
||||||
|
let license_expression = match &self.project.license {
|
||||||
|
None => None,
|
||||||
|
Some(License::Spdx(license_expression)) => Some(license_expression.clone()),
|
||||||
|
Some(License::Text { .. } | License::File { .. }) => {
|
||||||
|
return Err(ValidationError::MixedLicenseGenerations.into());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut license_files = Vec::new();
|
||||||
|
let mut license_globs_parsed = Vec::new();
|
||||||
|
for license_glob in license_globs {
|
||||||
|
let pep639_glob =
|
||||||
|
PortableGlobParser::Pep639
|
||||||
|
.parse(license_glob)
|
||||||
|
.map_err(|err| Error::PortableGlob {
|
||||||
|
field: license_glob.to_owned(),
|
||||||
|
source: err,
|
||||||
|
})?;
|
||||||
|
license_globs_parsed.push(pep639_glob);
|
||||||
|
}
|
||||||
|
let license_globs =
|
||||||
|
GlobDirFilter::from_globs(&license_globs_parsed).map_err(|err| {
|
||||||
|
Error::GlobSetTooLarge {
|
||||||
|
field: "tool.uv.build-backend.source-include".to_string(),
|
||||||
|
source: err,
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
for entry in WalkDir::new(root)
|
||||||
|
.sort_by_file_name()
|
||||||
|
.into_iter()
|
||||||
|
.filter_entry(|entry| {
|
||||||
|
license_globs.match_directory(
|
||||||
|
entry
|
||||||
|
.path()
|
||||||
|
.strip_prefix(root)
|
||||||
|
.expect("walkdir starts with root"),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
{
|
||||||
|
let entry = entry.map_err(|err| Error::WalkDir {
|
||||||
|
root: root.to_path_buf(),
|
||||||
|
err,
|
||||||
|
})?;
|
||||||
|
let relative = entry
|
||||||
|
.path()
|
||||||
|
.strip_prefix(root)
|
||||||
|
.expect("walkdir starts with root");
|
||||||
|
if !license_globs.match_path(relative) {
|
||||||
|
trace!("Not a license files match: {}", relative.user_display());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if !entry.file_type().is_file() {
|
||||||
|
trace!(
|
||||||
|
"Not a file in license files match: {}",
|
||||||
|
relative.user_display()
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
error_on_venv(entry.file_name(), entry.path())?;
|
||||||
|
|
||||||
|
debug!("License files match: {}", relative.user_display());
|
||||||
|
license_files.push(relative.portable_display().to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
for license_file in &license_files {
|
||||||
|
let file_path = root.join(license_file);
|
||||||
|
let bytes = fs_err::read(&file_path)?;
|
||||||
|
if str::from_utf8(&bytes).is_err() {
|
||||||
|
return Err(ValidationError::LicenseFileNotUtf8(license_file.clone()).into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The glob order may be unstable
|
||||||
|
license_files.sort();
|
||||||
|
|
||||||
|
(None, license_expression, license_files)
|
||||||
|
} else {
|
||||||
|
match &self.project.license {
|
||||||
|
None => (None, None, Vec::new()),
|
||||||
|
Some(License::Spdx(license_expression)) => {
|
||||||
|
(None, Some(license_expression.clone()), Vec::new())
|
||||||
|
}
|
||||||
|
Some(License::Text { text }) => (Some(text.clone()), None, Vec::new()),
|
||||||
|
Some(License::File { file }) => {
|
||||||
|
let text = fs_err::read_to_string(root.join(file))?;
|
||||||
|
(Some(text), None, Vec::new())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Check that the license expression is a valid SPDX identifier.
|
||||||
|
if let Some(license_expression) = &license_expression {
|
||||||
|
if let Err(err) = spdx::Expression::parse(license_expression) {
|
||||||
|
return Err(ValidationError::InvalidSpdx(license_expression.clone(), err).into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((license, license_expression, license_files))
|
||||||
|
}
|
||||||
|
|
||||||
/// Validate and convert the entrypoints in `pyproject.toml`, including console and GUI scripts,
|
/// Validate and convert the entrypoints in `pyproject.toml`, including console and GUI scripts,
|
||||||
/// to an `entry_points.txt`.
|
/// to an `entry_points.txt`.
|
||||||
///
|
///
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::common::{TestContext, uv_snapshot, venv_bin_path};
|
use crate::common::{TestContext, uv_snapshot, venv_bin_path};
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use assert_cmd::assert::OutputAssertExt;
|
use assert_cmd::assert::OutputAssertExt;
|
||||||
use assert_fs::fixture::{FileTouch, FileWriteStr, PathChild, PathCreateDir};
|
use assert_fs::fixture::{FileTouch, FileWriteBin, FileWriteStr, PathChild, PathCreateDir};
|
||||||
use flate2::bufread::GzDecoder;
|
use flate2::bufread::GzDecoder;
|
||||||
use fs_err::File;
|
use fs_err::File;
|
||||||
use indoc::{formatdoc, indoc};
|
use indoc::{formatdoc, indoc};
|
||||||
|
|
@ -760,6 +760,49 @@ fn complex_namespace_packages() -> Result<()> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn license_file_must_be_utf8() -> Result<()> {
|
||||||
|
let context = TestContext::new("3.12");
|
||||||
|
|
||||||
|
let project = context.temp_dir.child("license-utf8");
|
||||||
|
context
|
||||||
|
.init()
|
||||||
|
.arg("--lib")
|
||||||
|
.arg(project.path())
|
||||||
|
.assert()
|
||||||
|
.success();
|
||||||
|
|
||||||
|
project.child("pyproject.toml").write_str(indoc! {r#"
|
||||||
|
[project]
|
||||||
|
name = "license-utf8"
|
||||||
|
version = "1.0.0"
|
||||||
|
license-files = ["LICENSE.bin"]
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["uv_build>=0.7,<10000"]
|
||||||
|
build-backend = "uv_build"
|
||||||
|
"#
|
||||||
|
})?;
|
||||||
|
|
||||||
|
project.child("LICENSE.bin").write_binary(&[0xff])?;
|
||||||
|
|
||||||
|
uv_snapshot!(context
|
||||||
|
.build_backend()
|
||||||
|
.arg("build-wheel")
|
||||||
|
.arg(context.temp_dir.path())
|
||||||
|
.current_dir(project.path()), @r###"
|
||||||
|
success: false
|
||||||
|
exit_code: 2
|
||||||
|
----- stdout -----
|
||||||
|
|
||||||
|
----- stderr -----
|
||||||
|
error: Invalid pyproject.toml
|
||||||
|
Caused by: License file `LICENSE.bin` must be UTF-8 encoded
|
||||||
|
"###);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Test that a symlinked file (here: license) gets included.
|
/// Test that a symlinked file (here: license) gets included.
|
||||||
#[test]
|
#[test]
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue