Add type-based validation for index names (#8464)

## Summary

Also documents the normalization scheme.
This commit is contained in:
Charlie Marsh 2024-10-22 12:10:20 -04:00 committed by GitHub
parent 399d5ab50a
commit ff3ed3b797
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 189 additions and 44 deletions

View file

@ -145,20 +145,9 @@ impl Credentials {
///
/// For example, given a name of `"pytorch"`, search for `UV_INDEX_PYTORCH_USERNAME` and
/// `UV_INDEX_PYTORCH_PASSWORD`.
pub fn from_env(name: &str) -> Option<Self> {
// Convert to uppercase, and replace any non-alphanumeric characters with underscores.
let name = name
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() {
c.to_ascii_uppercase()
} else {
'_'
}
})
.collect::<String>();
let username = std::env::var(EnvVars::index_username(&name)).ok();
let password = std::env::var(EnvVars::index_password(&name)).ok();
pub fn from_env(name: impl AsRef<str>) -> Option<Self> {
let username = std::env::var(EnvVars::index_username(name.as_ref())).ok();
let password = std::env::var(EnvVars::index_password(name.as_ref())).ok();
if username.is_none() && password.is_none() {
None
} else {

View file

@ -1,8 +1,11 @@
use std::str::FromStr;
use thiserror::Error;
use url::Url;
use uv_auth::Credentials;
use crate::index_name::{IndexName, IndexNameError};
use crate::origin::Origin;
use crate::{IndexUrl, IndexUrlError};
@ -22,7 +25,7 @@ pub struct Index {
/// [tool.uv.sources]
/// torch = { index = "pytorch" }
/// ```
pub name: Option<String>,
pub name: Option<IndexName>,
/// The URL of the index.
///
/// Expects to receive a URL (e.g., `https://pypi.org/simple`) or a local path.
@ -137,8 +140,8 @@ impl Index {
/// Retrieve the credentials for the index, either from the environment, or from the URL itself.
pub fn credentials(&self) -> Option<Credentials> {
// If the index is named, and credentials are provided via the environment, prefer those.
if let Some(name) = self.name.as_deref() {
if let Some(credentials) = Credentials::from_env(name) {
if let Some(name) = self.name.as_ref() {
if let Some(credentials) = Credentials::from_env(name.to_env_var()) {
return Some(credentials);
}
}
@ -154,17 +157,11 @@ impl FromStr for Index {
fn from_str(s: &str) -> Result<Self, Self::Err> {
// Determine whether the source is prefixed with a name, as in `name=https://pypi.org/simple`.
if let Some((name, url)) = s.split_once('=') {
if name.is_empty() {
return Err(IndexSourceError::EmptyName);
}
if name
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
if !name.chars().any(|c| c == ':') {
let name = IndexName::from_str(name)?;
let url = IndexUrl::from_str(url)?;
return Ok(Self {
name: Some(name.to_string()),
name: Some(name),
url,
explicit: false,
default: false,
@ -190,6 +187,8 @@ impl FromStr for Index {
pub enum IndexSourceError {
#[error(transparent)]
Url(#[from] IndexUrlError),
#[error(transparent)]
IndexName(#[from] IndexNameError),
#[error("Index included a name, but the name was empty")]
EmptyName,
}

View file

@ -0,0 +1,94 @@
use std::ops::Deref;
use std::str::FromStr;
use thiserror::Error;
/// The normalized name of an index.
///
/// Index names may contain letters, digits, hyphens, underscores, and periods, and must be ASCII.
#[derive(Debug, Clone, Hash, Eq, PartialEq, serde::Serialize)]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct IndexName(String);
impl IndexName {
/// Validates the given index name and returns [`IndexName`] if it's valid, or an error
/// otherwise.
pub fn new(name: String) -> Result<Self, IndexNameError> {
for c in name.chars() {
match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '.' => {}
c if c.is_ascii() => {
return Err(IndexNameError::UnsupportedCharacter(c, name));
}
c => {
return Err(IndexNameError::NonAsciiName(c, name));
}
}
}
Ok(Self(name))
}
/// Converts the index name to an environment variable name.
///
/// For example, given `IndexName("foo-bar")`, this will return `"FOO_BAR"`.
pub fn to_env_var(&self) -> String {
self.0
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() {
c.to_ascii_uppercase()
} else {
'_'
}
})
.collect::<String>()
}
}
impl FromStr for IndexName {
type Err = IndexNameError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::new(s.to_string())
}
}
impl<'de> serde::de::Deserialize<'de> for IndexName {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::de::Deserializer<'de>,
{
IndexName::new(String::deserialize(deserializer)?).map_err(serde::de::Error::custom)
}
}
impl std::fmt::Display for IndexName {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl AsRef<str> for IndexName {
fn as_ref(&self) -> &str {
&self.0
}
}
impl Deref for IndexName {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// An error that can occur when parsing an [`IndexName`].
#[derive(Error, Debug)]
pub enum IndexNameError {
#[error("Index included a name, but the name was empty")]
EmptyName,
#[error("Index names may only contain letters, digits, hyphens, underscores, and periods, but found unsupported character (`{0}`) in: `{1}`")]
UnsupportedCharacter(char, String),
#[error("Index names must be ASCII, but found non-ASCII character (`{0}`) in: `{1}`")]
NonAsciiName(char, String),
}

View file

@ -58,6 +58,7 @@ pub use crate::file::*;
pub use crate::hash::*;
pub use crate::id::*;
pub use crate::index::*;
pub use crate::index_name::*;
pub use crate::index_url::*;
pub use crate::installed::*;
pub use crate::origin::*;
@ -79,6 +80,7 @@ mod file;
mod hash;
mod id;
mod index;
mod index_name;
mod index_url;
mod installed;
mod origin;

View file

@ -6,7 +6,7 @@ use thiserror::Error;
use url::Url;
use uv_configuration::LowerBound;
use uv_distribution_filename::DistExtension;
use uv_distribution_types::{Index, IndexLocations, Origin};
use uv_distribution_types::{Index, IndexLocations, IndexName, Origin};
use uv_git::GitReference;
use uv_normalize::PackageName;
use uv_pep440::VersionSpecifiers;
@ -398,7 +398,7 @@ pub enum LoweringError {
#[error("Can only specify one of: `rev`, `tag`, or `branch`")]
MoreThanOneGitRef,
#[error("Package `{0}` references an undeclared index: `{1}`")]
MissingIndex(PackageName, String),
MissingIndex(PackageName, IndexName),
#[error("Workspace members are not allowed in non-workspace contexts")]
WorkspaceMember,
#[error(transparent)]

View file

@ -16,7 +16,7 @@ use std::str::FromStr;
use std::{collections::BTreeMap, mem};
use thiserror::Error;
use url::Url;
use uv_distribution_types::Index;
use uv_distribution_types::{Index, IndexName};
use uv_fs::{relative_to, PortablePathBuf};
use uv_git::GitReference;
use uv_macros::OptionsMetadata;
@ -644,7 +644,7 @@ pub enum Source {
},
/// A dependency pinned to a specific index, e.g., `torch` after setting `torch` to `https://download.pytorch.org/whl/cu118`.
Registry {
index: String,
index: IndexName,
#[serde(
skip_serializing_if = "uv_pep508::marker::ser::is_empty",
serialize_with = "uv_pep508::marker::ser::serialize",
@ -684,7 +684,7 @@ impl<'de> Deserialize<'de> for Source {
url: Option<Url>,
path: Option<PortablePathBuf>,
editable: Option<bool>,
index: Option<String>,
index: Option<IndexName>,
workspace: Option<bool>,
#[serde(
skip_serializing_if = "uv_pep508::marker::ser::is_empty",
@ -993,7 +993,7 @@ impl Source {
source: RequirementSource,
workspace: bool,
editable: Option<bool>,
index: Option<String>,
index: Option<IndexName>,
rev: Option<String>,
tag: Option<String>,
branch: Option<String>,

View file

@ -18,7 +18,7 @@ use uv_configuration::{
};
use uv_dispatch::BuildDispatch;
use uv_distribution::DistributionDatabase;
use uv_distribution_types::{Index, UnresolvedRequirement, VersionId};
use uv_distribution_types::{Index, IndexName, UnresolvedRequirement, VersionId};
use uv_fs::Simplified;
use uv_git::{GitReference, GIT_STORE};
use uv_normalize::PackageName;
@ -910,7 +910,7 @@ fn resolve_requirement(
requirement: uv_pypi_types::Requirement,
workspace: bool,
editable: Option<bool>,
index: Option<String>,
index: Option<IndexName>,
rev: Option<String>,
tag: Option<String>,
branch: Option<String>,

View file

@ -12448,6 +12448,57 @@ fn lock_trailing_slash() -> Result<()> {
Ok(())
}
#[test]
fn lock_invalid_index() -> Result<()> {
let context = TestContext::new("3.12");
let pyproject_toml = context.temp_dir.child("pyproject.toml");
pyproject_toml.write_str(
r#"
[project]
name = "project"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = ["anyio==3.7.0", "iniconfig==2.0.0"]
[build-system]
requires = ["setuptools>=42"]
build-backend = "setuptools.build_meta"
[tool.uv.sources]
iniconfig = { index = "internal proxy" }
[[tool.uv.index]]
name = "internal proxy"
url = "https://test.pypi.org/simple"
explicit = true
"#,
)?;
uv_snapshot!(context.filters(), context.lock(), @r###"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
warning: Failed to parse `pyproject.toml` during settings discovery:
TOML parse error at line 16, column 16
|
16 | name = "internal proxy"
| ^^^^^^^^^^^^^^^^
Index names may only contain letters, digits, hyphens, underscores, and periods, but found unsupported character (` `) in: `internal proxy`
error: Failed to parse: `pyproject.toml`
Caused by: TOML parse error at line 13, column 31
|
13 | iniconfig = { index = "internal proxy" }
| ^^^^^^^^^^^^^^^^
Index names may only contain letters, digits, hyphens, underscores, and periods, but found unsupported character (` `) in: `internal proxy`
"###);
Ok(())
}
#[test]
fn lock_explicit_index() -> Result<()> {
let context = TestContext::new("3.12");

View file

@ -18,8 +18,6 @@ name = "pytorch"
url = "https://download.pytorch.org/whl/cpu"
```
Index names must only contain alphanumeric characters, dashes, or underscores.
Indexes are prioritized in the order in which theyre defined, such that the first index listed in
the configuration file is the first index consulted when resolving dependencies, with indexes
provided via the command line taking precedence over those in the configuration file.
@ -38,6 +36,9 @@ default = true
The default index is always treated as lowest priority, regardless of its position in the list of
indexes.
Index names many only contain alphanumeric characters, dashes, underscores, and periods, and must be
valid ASCII.
## Pinning a package to an index
A package can be pinned to a specific index by specifying the index in its `tool.uv.sources` entry.
@ -127,17 +128,18 @@ password (or access token).
To authenticate with a provide index, either provide credentials via environment variables or embed
them in the URL.
For example, given an index named `internal` that requires a username (`public`) and password
For example, given an index named `internal-proxy` that requires a username (`public`) and password
(`koala`), define the index (without credentials) in your `pyproject.toml`:
```toml
[[tool.uv.index]]
name = "internal"
name = "internal-proxy"
url = "https://example.com/simple"
```
From there, you can set the `UV_INDEX_INTERNAL_USERNAME` and `UV_INDEX_INTERNAL_PASSWORD`
environment variables, where `INTERNAL` is the uppercase version of the index name:
From there, you can set the `UV_INDEX_INTERNAL_PROXY_USERNAME` and
`UV_INDEX_INTERNAL_PROXY_PASSWORD` environment variables, where `INTERNAL` is the uppercase version
of the index name, with non-alphanumeric characters replaced by underscores:
```sh
export UV_INDEX_INTERNAL_USERNAME=public

16
uv.schema.json generated
View file

@ -597,9 +597,13 @@
},
"name": {
"description": "The name of the index.\n\nIndex names can be used to reference indexes elsewhere in the configuration. For example, you can pin a package to a specific index by name:\n\n```toml [[tool.uv.index]] name = \"pytorch\" url = \"https://download.pytorch.org/whl/cu121\"\n\n[tool.uv.sources] torch = { index = \"pytorch\" } ```",
"type": [
"string",
"null"
"anyOf": [
{
"$ref": "#/definitions/IndexName"
},
{
"type": "null"
}
]
},
"url": {
@ -612,6 +616,10 @@
}
}
},
"IndexName": {
"description": "The normalized name of an index.\n\nIndex names may contain letters, digits, hyphens, underscores, and periods, and must be ASCII.",
"type": "string"
},
"IndexStrategy": {
"oneOf": [
{
@ -1399,7 +1407,7 @@
],
"properties": {
"index": {
"type": "string"
"$ref": "#/definitions/IndexName"
},
"marker": {
"$ref": "#/definitions/MarkerTree"