Use index URL instead of package URL for keyring credential lookups (#12651)

Some registries (like Azure Artifact) can require you to authenticate
separately for every package URL if you do not authenticate for the
/simple endpoint. These changes make the auth middleware aware of index
URL endpoints and attempts to fetch keyring credentials for such an
index URL when making a request to any URL it's a prefix of.

The current uv behavior is to cache credentials either at the request
URL or realm level. But with these changes, we also need to cache
credentials at the index level. Note that when uv does not detect an
index URL for a request URL, it will continue to apply the old behavior.

Addresses part of #4056
Closes #4583
Closes #11236
Closes #11391
Closes #11507
This commit is contained in:
Zanie Blue 2025-04-29 12:54:07 -05:00
parent 514a7ea6df
commit de1479c4ef
26 changed files with 571 additions and 203 deletions

View file

@ -17,8 +17,8 @@ type FxOnceMap<K, V> = OnceMap<K, V, BuildHasherDefault<FxHasher>>;
pub struct CredentialsCache {
/// A cache per realm and username
realms: RwLock<FxHashMap<(Realm, Username), Arc<Credentials>>>,
/// A cache tracking the result of fetches from external services
pub(crate) fetches: FxOnceMap<(Realm, Username), Option<Arc<Credentials>>>,
/// A cache tracking the result of realm or index URL fetches from external services
pub(crate) fetches: FxOnceMap<(String, Username), Option<Arc<Credentials>>>,
/// A cache per URL, uses a trie for efficient prefix queries.
urls: RwLock<UrlTrie>,
}

View file

@ -1,6 +1,6 @@
use std::fmt::{self, Display, Formatter};
use rustc_hash::FxHashMap;
use rustc_hash::FxHashSet;
use url::Url;
/// When to use authentication.
@ -47,27 +47,45 @@ impl Display for AuthPolicy {
}
}
}
// TODO(john): We are not using `uv_distribution_types::Index` directly
// here because it would cause circular crate dependencies. However, this
// could potentially make sense for a future refactor.
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub struct Index {
pub url: Url,
/// The root endpoint where authentication is applied.
/// For PEP 503 endpoints, this excludes `/simple`.
pub root_url: Url,
pub auth_policy: AuthPolicy,
}
#[derive(Debug, Default, Clone, Eq, PartialEq)]
pub struct UrlAuthPolicies(FxHashMap<Url, AuthPolicy>);
pub struct Indexes(FxHashSet<Index>);
impl UrlAuthPolicies {
impl Indexes {
pub fn new() -> Self {
Self(FxHashMap::default())
Self(FxHashSet::default())
}
/// Create a new [`UrlAuthPolicies`] from a list of URL and [`AuthPolicy`]
/// tuples.
pub fn from_tuples(tuples: impl IntoIterator<Item = (Url, AuthPolicy)>) -> Self {
let mut auth_policies = Self::new();
for (url, auth_policy) in tuples {
auth_policies.add_policy(url, auth_policy);
/// Create a new [`AuthIndexUrls`] from an iterator of [`AuthIndexUrl`]s.
pub fn from_indexes(urls: impl IntoIterator<Item = Index>) -> Self {
let mut index_urls = Self::new();
for url in urls {
index_urls.0.insert(url);
}
auth_policies
index_urls
}
/// An [`AuthPolicy`] for a URL.
pub fn add_policy(&mut self, url: Url, auth_policy: AuthPolicy) {
self.0.insert(url, auth_policy);
/// Get the index URL prefix for a URL if one exists.
pub fn index_url_for(&self, url: &Url) -> Option<&Url> {
// TODO(john): There are probably not many URLs to iterate through,
// but we could use a trie instead of a HashSet here for more
// efficient search.
self.0
.iter()
.find(|index| is_url_prefix(&index.root_url, url))
.map(|index| &index.url)
}
/// Get the [`AuthPolicy`] for a URL.
@ -75,11 +93,22 @@ impl UrlAuthPolicies {
// TODO(john): There are probably not many URLs to iterate through,
// but we could use a trie instead of a HashMap here for more
// efficient search.
for (auth_url, auth_policy) in &self.0 {
if url.as_str().starts_with(auth_url.as_str()) {
return *auth_policy;
for index in &self.0 {
if is_url_prefix(&index.root_url, url) {
return index.auth_policy;
}
}
AuthPolicy::Auto
}
}
fn is_url_prefix(base: &Url, url: &Url) -> bool {
if base.scheme() != url.scheme()
|| base.host_str() != url.host_str()
|| base.port_or_known_default() != url.port_or_known_default()
{
return false;
}
url.path().starts_with(base.path())
}

View file

@ -5,16 +5,16 @@ use url::Url;
use cache::CredentialsCache;
pub use credentials::Credentials;
pub use index::{AuthPolicy, Index, Indexes};
pub use keyring::KeyringProvider;
pub use middleware::AuthMiddleware;
pub use policy::{AuthPolicy, UrlAuthPolicies};
use realm::Realm;
mod cache;
mod credentials;
mod index;
mod keyring;
mod middleware;
mod policy;
mod realm;
// TODO(zanieb): Consider passing a cache explicitly throughout

View file

@ -5,7 +5,7 @@ use url::Url;
use crate::{
credentials::{Credentials, Username},
policy::{AuthPolicy, UrlAuthPolicies},
index::{AuthPolicy, Indexes},
realm::Realm,
CredentialsCache, KeyringProvider, CREDENTIALS_CACHE,
};
@ -58,7 +58,7 @@ pub struct AuthMiddleware {
keyring: Option<KeyringProvider>,
cache: Option<CredentialsCache>,
/// Auth policies for specific URLs.
url_auth_policies: UrlAuthPolicies,
indexes: Indexes,
/// Set all endpoints as needing authentication. We never try to send an
/// unauthenticated request, avoiding cloning an uncloneable request.
only_authenticated: bool,
@ -70,7 +70,7 @@ impl AuthMiddleware {
netrc: NetrcMode::default(),
keyring: None,
cache: None,
url_auth_policies: UrlAuthPolicies::new(),
indexes: Indexes::new(),
only_authenticated: false,
}
}
@ -104,8 +104,8 @@ impl AuthMiddleware {
/// Configure the [`AuthPolicy`]s to use for URLs.
#[must_use]
pub fn with_url_auth_policies(mut self, auth_policies: UrlAuthPolicies) -> Self {
self.url_auth_policies = auth_policies;
pub fn with_indexes(mut self, indexes: Indexes) -> Self {
self.indexes = indexes;
self
}
@ -148,7 +148,7 @@ impl Middleware for AuthMiddleware {
/// We'll avoid making a request we expect to fail and look for a password.
/// The discovered credentials must have the requested username to be used.
///
/// - Check the cache (realm key) for a password
/// - Check the cache (index URL or realm key) for a password
/// - Check the netrc for a password
/// - Check the keyring for a password
/// - Perform the request
@ -162,10 +162,10 @@ impl Middleware for AuthMiddleware {
/// server tells us authorization is needed. This pattern avoids attaching credentials to
/// requests that do not need them, which can cause some servers to deny the request.
///
/// - Check the cache (url key)
/// - Check the cache (URL key)
/// - Perform the request
/// - On 401, 403, or 404 check for authentication if there was a cache miss
/// - Check the cache (realm key) for the username and password
/// - Check the cache (index URL or realm key) for the username and password
/// - Check the netrc for a username and password
/// - Perform the request again if found
/// - Add the username and password to the cache if successful
@ -181,7 +181,8 @@ impl Middleware for AuthMiddleware {
// In the middleware, existing credentials are already moved from the URL
// to the headers so for display purposes we restore some information
let url = tracing_url(&request, request_credentials.as_ref());
let auth_policy = self.url_auth_policies.policy_for(request.url());
let maybe_index_url = self.indexes.index_url_for(request.url());
let auth_policy = self.indexes.policy_for(request.url());
trace!("Handling request for {url} with authentication policy {auth_policy}");
let credentials: Option<Arc<Credentials>> = if matches!(auth_policy, AuthPolicy::Never) {
@ -195,6 +196,7 @@ impl Middleware for AuthMiddleware {
extensions,
next,
&url,
maybe_index_url,
auth_policy,
)
.await;
@ -272,17 +274,20 @@ impl Middleware for AuthMiddleware {
(request, None)
};
// Check if there are credentials in the realm-level cache
let credentials = self
.cache()
.get_realm(
Realm::from(retry_request.url()),
credentials
.as_ref()
.map(|credentials| credentials.to_username())
.unwrap_or(Username::none()),
)
.or(credentials);
let username = credentials
.as_ref()
.map(|credentials| credentials.to_username())
.unwrap_or(Username::none());
let credentials = if let Some(index_url) = maybe_index_url {
self.cache().get_url(index_url, &username)
} else {
// Since there is no known index for this URL, check if there are credentials in
// the realm-level cache.
self.cache()
.get_realm(Realm::from(retry_request.url()), username)
}
.or(credentials);
if let Some(credentials) = credentials.as_ref() {
if credentials.password().is_some() {
trace!("Retrying request for {url} with credentials from cache {credentials:?}");
@ -296,7 +301,12 @@ impl Middleware for AuthMiddleware {
// Then, fetch from external services.
// Here, we use the username from the cache if present.
if let Some(credentials) = self
.fetch_credentials(credentials.as_deref(), retry_request.url(), auth_policy)
.fetch_credentials(
credentials.as_deref(),
retry_request.url(),
maybe_index_url,
auth_policy,
)
.await
{
retry_request = credentials.authenticate(retry_request);
@ -374,6 +384,7 @@ impl AuthMiddleware {
extensions: &mut Extensions,
next: Next<'_>,
url: &str,
maybe_index_url: Option<&Url>,
auth_policy: AuthPolicy,
) -> reqwest_middleware::Result<Response> {
let credentials = Arc::new(credentials);
@ -387,15 +398,27 @@ impl AuthMiddleware {
}
trace!("Request for {url} is missing a password, looking for credentials");
// There's just a username, try to find a password
let credentials = if let Some(credentials) = self
.cache()
.get_realm(Realm::from(request.url()), credentials.to_username())
{
// There's just a username, try to find a password.
// If we have an index URL, check the cache for that URL. Otherwise,
// check for the realm.
let maybe_cached_credentials = if let Some(index_url) = maybe_index_url {
self.cache()
.get_url(index_url, credentials.as_username().as_ref())
} else {
self.cache()
.get_realm(Realm::from(request.url()), credentials.to_username())
};
if let Some(credentials) = maybe_cached_credentials {
request = credentials.authenticate(request);
// Do not insert already-cached credentials
None
} else if let Some(credentials) = self
let credentials = None;
return self
.complete_request(credentials, request, extensions, next, auth_policy)
.await;
}
let credentials = if let Some(credentials) = self
.cache()
.get_url(request.url(), credentials.as_username().as_ref())
{
@ -403,11 +426,21 @@ impl AuthMiddleware {
// Do not insert already-cached credentials
None
} else if let Some(credentials) = self
.fetch_credentials(Some(&credentials), request.url(), auth_policy)
.fetch_credentials(
Some(&credentials),
request.url(),
maybe_index_url,
auth_policy,
)
.await
{
request = credentials.authenticate(request);
Some(credentials)
} else if maybe_index_url.is_some() {
// If this is a known index, we fall back to checking for the realm.
self.cache()
.get_realm(Realm::from(request.url()), credentials.to_username())
.or(Some(credentials))
} else {
// If we don't find a password, we'll still attempt the request with the existing credentials
Some(credentials)
@ -425,18 +458,20 @@ impl AuthMiddleware {
&self,
credentials: Option<&Credentials>,
url: &Url,
maybe_index_url: Option<&Url>,
auth_policy: AuthPolicy,
) -> Option<Arc<Credentials>> {
// Fetches can be expensive, so we will only run them _once_ per realm and username combination
// All other requests for the same realm will wait until the first one completes
let key = (
Realm::from(url),
Username::from(
credentials
.map(|credentials| credentials.username().unwrap_or_default().to_string()),
),
let username = Username::from(
credentials.map(|credentials| credentials.username().unwrap_or_default().to_string()),
);
// Fetches can be expensive, so we will only run them _once_ per realm or index URL and username combination
// All other requests for the same realm or index URL will wait until the first one completes
let key = if let Some(index_url) = maybe_index_url {
(index_url.to_string(), username)
} else {
(Realm::from(url).to_string(), username)
};
if !self.cache().fetches.register(key.clone()) {
let credentials = self
.cache()
@ -446,9 +481,12 @@ impl AuthMiddleware {
.expect("The key must exist after register is called");
if credentials.is_some() {
trace!("Using credentials from previous fetch for {url}");
trace!("Using credentials from previous fetch for {}", key.0);
} else {
trace!("Skipping fetch of credentials for {url}, previous attempt failed");
trace!(
"Skipping fetch of credentials for {}, previous attempt failed",
key.0
);
}
return credentials;
@ -468,22 +506,35 @@ impl AuthMiddleware {
debug!("Found credentials in netrc file for {url}");
Some(credentials)
// N.B. The keyring provider performs lookups for the exact URL then falls back to the host,
// but we cache the result per realm so if a keyring implementation returns different
// credentials for different URLs in the same realm we will use the wrong credentials.
// N.B. The keyring provider performs lookups for the exact URL then falls back to the host.
// But, in the absence of an index URL, we cache the result per realm. So in that case,
// if a keyring implementation returns different credentials for different URLs in the
// same realm we will use the wrong credentials.
} else if let Some(credentials) = match self.keyring {
Some(ref keyring) => {
// The subprocess keyring provider is _slow_ so we do not perform fetches for all
// URLs; instead, we fetch if there's a username or if the user has requested to
// always authenticate.
if let Some(username) = credentials.and_then(|credentials| credentials.username()) {
debug!("Checking keyring for credentials for {username}@{url}");
keyring.fetch(url, Some(username)).await
if let Some(index_url) = maybe_index_url {
debug!("Checking keyring for credentials for index URL {}@{}", username, index_url);
keyring.fetch(index_url, Some(username)).await
} else {
debug!("Checking keyring for credentials for full URL {}@{}", username, *url);
keyring.fetch(url, Some(username)).await
}
} else if matches!(auth_policy, AuthPolicy::Always) {
debug!(
"Checking keyring for credentials for {url} without username due to `authenticate = always`"
);
keyring.fetch(url, None).await
if let Some(index_url) = maybe_index_url {
debug!(
"Checking keyring for credentials for index URL {index_url} without username due to `authenticate = always`"
);
keyring.fetch(index_url, None).await
} else {
debug!(
"Checking keyring for credentials for full URL {url} without username due to `authenticate = always`"
);
keyring.fetch(url, None).await
}
} else {
debug!("Skipping keyring fetch for {url} without username; use `authenticate = always` to force");
None
@ -499,7 +550,7 @@ impl AuthMiddleware {
.map(Arc::new);
// Register the fetch for this key
self.cache().fetches.done(key.clone(), credentials.clone());
self.cache().fetches.done(key, credentials.clone());
credentials
}
@ -539,6 +590,7 @@ mod tests {
use wiremock::{Mock, MockServer, ResponseTemplate};
use crate::credentials::Password;
use crate::Index;
use super::*;
@ -999,7 +1051,7 @@ mod tests {
let server = start_test_server(username, password).await;
let base_url = Url::parse(&server.uri())?;
let auth_policies = auth_policies_for(&base_url, AuthPolicy::Always);
let indexes = indexes_for(&base_url, AuthPolicy::Always);
let client = test_client_builder()
.with(
AuthMiddleware::new()
@ -1013,7 +1065,7 @@ mod tests {
username,
password,
)])))
.with_url_auth_policies(auth_policies),
.with_indexes(indexes),
)
.build();
@ -1656,13 +1708,213 @@ mod tests {
Ok(())
}
fn auth_policies_for(url: &Url, policy: AuthPolicy) -> UrlAuthPolicies {
/// Demonstrates that when an index URL is provided, we avoid "incorrect" behavior
/// where multiple URLs with the same username and realm share the same realm-level
/// credentials cache entry.
#[test(tokio::test)]
async fn test_credentials_from_keyring_mixed_authentication_different_indexes_same_realm(
) -> Result<(), Error> {
let username = "user";
let password_1 = "password1";
let password_2 = "password2";
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("/prefix_1.*"))
.and(basic_auth(username, password_1))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.and(path_regex("/prefix_2.*"))
.and(basic_auth(username, password_2))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(401))
.mount(&server)
.await;
let base_url = Url::parse(&server.uri())?;
let base_url_1 = base_url.join("prefix_1")?;
let base_url_2 = base_url.join("prefix_2")?;
let indexes = Indexes::from_indexes(vec![
Index {
url: base_url_1.clone(),
root_url: base_url_1.clone(),
auth_policy: AuthPolicy::Auto,
},
Index {
url: base_url_2.clone(),
root_url: base_url_2.clone(),
auth_policy: AuthPolicy::Auto,
},
]);
let client = test_client_builder()
.with(
AuthMiddleware::new()
.with_cache(CredentialsCache::new())
.with_keyring(Some(KeyringProvider::dummy([
(base_url_1.clone(), username, password_1),
(base_url_2.clone(), username, password_2),
])))
.with_indexes(indexes),
)
.build();
// Both servers do not work without a username
assert_eq!(
client.get(base_url_1.clone()).send().await?.status(),
401,
"Requests should require a username"
);
assert_eq!(
client.get(base_url_2.clone()).send().await?.status(),
401,
"Requests should require a username"
);
let mut url_1 = base_url_1.clone();
url_1.set_username(username).unwrap();
assert_eq!(
client.get(url_1.clone()).send().await?.status(),
200,
"The first request with a username will succeed"
);
assert_eq!(
client.get(base_url_2.clone()).send().await?.status(),
401,
"Credentials should not be re-used for the second prefix"
);
assert_eq!(
client
.get(base_url.join("prefix_1/foo")?)
.send()
.await?
.status(),
200,
"Subsequent requests can be to different paths in the same prefix"
);
let mut url_2 = base_url_2.clone();
url_2.set_username(username).unwrap();
assert_eq!(
client.get(url_2.clone()).send().await?.status(),
200,
"A request with the same username and realm for a URL will use index-specific password"
);
assert_eq!(
client
.get(base_url.join("prefix_2/foo")?)
.send()
.await?
.status(),
200,
"Requests to other paths with that prefix will also succeed"
);
Ok(())
}
/// Demonstrates that when an index' credentials are cached for its realm, we
/// find those credentials if they're not present in the keyring.
#[test(tokio::test)]
async fn test_credentials_from_keyring_shared_authentication_different_indexes_same_realm(
) -> Result<(), Error> {
let username = "user";
let password = "password";
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(basic_auth(username, password))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.and(path_regex("/prefix_1.*"))
.and(basic_auth(username, password))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(401))
.mount(&server)
.await;
let base_url = Url::parse(&server.uri())?;
let index_url = base_url.join("prefix_1")?;
let indexes = Indexes::from_indexes(vec![Index {
url: index_url.clone(),
root_url: index_url.clone(),
auth_policy: AuthPolicy::Auto,
}]);
let client = test_client_builder()
.with(
AuthMiddleware::new()
.with_cache(CredentialsCache::new())
.with_keyring(Some(KeyringProvider::dummy([(
base_url.clone(),
username,
password,
)])))
.with_indexes(indexes),
)
.build();
// Index server does not work without a username
assert_eq!(
client.get(index_url.clone()).send().await?.status(),
401,
"Requests should require a username"
);
// Send a request that will cache realm credentials.
let mut realm_url = base_url.clone();
realm_url.set_username(username).unwrap();
assert_eq!(
client.get(realm_url.clone()).send().await?.status(),
200,
"The first realm request with a username will succeed"
);
let mut url = index_url.clone();
url.set_username(username).unwrap();
assert_eq!(
client.get(url.clone()).send().await?.status(),
200,
"A request with the same username and realm for a URL will use the realm if there is no index-specific password"
);
assert_eq!(
client
.get(base_url.join("prefix_1/foo")?)
.send()
.await?
.status(),
200,
"Requests to other paths with that prefix will also succeed"
);
Ok(())
}
fn indexes_for(url: &Url, policy: AuthPolicy) -> Indexes {
let mut url = url.clone();
let mut policies = UrlAuthPolicies::new();
url.set_password(None).ok();
url.set_username("").ok();
policies.add_policy(url, policy);
policies
Indexes::from_indexes(vec![Index {
url: url.clone(),
root_url: url.clone(),
auth_policy: policy,
}])
}
/// With the "always" auth policy, requests should succeed on
@ -1676,12 +1928,12 @@ mod tests {
let base_url = Url::parse(&server.uri())?;
let auth_policies = auth_policies_for(&base_url, AuthPolicy::Always);
let indexes = indexes_for(&base_url, AuthPolicy::Always);
let client = test_client_builder()
.with(
AuthMiddleware::new()
.with_cache(CredentialsCache::new())
.with_url_auth_policies(auth_policies),
.with_indexes(indexes),
)
.build();
@ -1743,12 +1995,12 @@ mod tests {
let base_url = Url::parse(&server.uri())?;
let auth_policies = auth_policies_for(&base_url, AuthPolicy::Always);
let indexes = indexes_for(&base_url, AuthPolicy::Always);
let client = test_client_builder()
.with(
AuthMiddleware::new()
.with_cache(CredentialsCache::new())
.with_url_auth_policies(auth_policies),
.with_indexes(indexes),
)
.build();
@ -1783,12 +2035,12 @@ mod tests {
.mount(&server)
.await;
let auth_policies = auth_policies_for(&base_url, AuthPolicy::Never);
let indexes = indexes_for(&base_url, AuthPolicy::Never);
let client = test_client_builder()
.with(
AuthMiddleware::new()
.with_cache(CredentialsCache::new())
.with_url_auth_policies(auth_policies),
.with_indexes(indexes),
)
.build();
@ -1828,12 +2080,12 @@ mod tests {
let base_url = Url::parse(&server.uri())?;
let auth_policies = auth_policies_for(&base_url, AuthPolicy::Never);
let indexes = indexes_for(&base_url, AuthPolicy::Never);
let client = test_client_builder()
.with(
AuthMiddleware::new()
.with_cache(CredentialsCache::new())
.with_url_auth_policies(auth_policies),
.with_indexes(indexes),
)
.build();