Remove KeyringProvider.cache (#3243)

This is handled by `CredentialsCache.fetches` instead since #3237 

Moves the test demonstrating the flaw in the cache to the middleware
level.
This commit is contained in:
Zanie Blue 2024-04-24 10:39:24 -05:00 committed by GitHub
parent a5abb8eb1e
commit 84bb6e1976
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 103 additions and 52 deletions

View file

@ -1,8 +1,5 @@
use std::collections::HashSet;
use std::sync::RwLock;
use tokio::process::Command;
use tracing::{debug, instrument, trace, warn};
use tracing::{instrument, trace, warn};
use url::Url;
use crate::credentials::Credentials;
@ -13,8 +10,6 @@ use crate::credentials::Credentials;
/// <https://github.com/pypa/pip/blob/ae5fff36b0aad6e5e0037884927eaa29163c0611/src/pip/_internal/network/auth.py#L102>
#[derive(Debug)]
pub struct KeyringProvider {
/// Tracks host and username pairs with no credentials to avoid repeated queries.
cache: RwLock<HashSet<(String, String)>>,
backend: KeyringProviderBackend,
}
@ -30,7 +25,6 @@ impl KeyringProvider {
/// Create a new [`KeyringProvider::Subprocess`].
pub fn subprocess() -> Self {
Self {
cache: RwLock::new(HashSet::new()),
backend: KeyringProviderBackend::Subprocess,
}
}
@ -55,27 +49,6 @@ impl KeyringProvider {
"Should only use keyring with a username"
);
let host = url.host_str()?;
// Avoid expensive lookups by tracking previous attempts with no credentials.
// N.B. We cache missing credentials per host so no credentials are found for
// a host but would return credentials for some other URL in the same realm
// we may not find the credentials depending on which URL we see first.
// This behavior avoids adding ~80ms to every request when the subprocess keyring
// provider is being used, but makes assumptions about the typical keyring
// use-cases.
let key = (host.to_string(), username.to_string());
{
let cache = self.cache.read().unwrap();
if cache.contains(&key) {
debug!(
"Skipping keyring lookup for {username} at {host}, already attempted and found no credentials."
);
return None;
}
}
// Check the full URL first
// <https://github.com/pypa/pip/blob/ae5fff36b0aad6e5e0037884927eaa29163c0611/src/pip/_internal/network/auth.py#L376C1-L379C14>
trace!("Checking keyring for URL {url}");
@ -90,6 +63,7 @@ impl KeyringProvider {
};
// And fallback to a check for the host
if password.is_none() {
let host = url.host_str()?;
trace!("Checking keyring for host {host}");
password = match self.backend {
KeyringProviderBackend::Subprocess => self.fetch_subprocess(host, username).await,
@ -98,11 +72,6 @@ impl KeyringProvider {
};
}
if password.is_none() {
let mut cache = self.cache.write().unwrap();
cache.insert(key);
}
password.map(|password| Credentials::new(Some(username.to_string()), Some(password)))
}
@ -149,7 +118,6 @@ impl KeyringProvider {
use std::collections::HashMap;
Self {
cache: RwLock::new(HashSet::new()),
backend: KeyringProviderBackend::Dummy(HashMap::from_iter(
iter.into_iter()
.map(|((service, username), password)| ((service.into(), username), password)),
@ -163,7 +131,6 @@ impl KeyringProvider {
use std::collections::HashMap;
Self {
cache: RwLock::new(HashSet::new()),
backend: KeyringProviderBackend::Dummy(HashMap::new()),
}
}
@ -273,22 +240,6 @@ mod test {
);
}
/// Demonstrates "incorrect" behavior in our cache which avoids an expensive fetch of
/// credentials for _every_ request URL at the cost of inconsistent behavior when
/// credentials are not scoped to a realm.
#[tokio::test]
async fn fetch_url_caches_based_on_host() {
let url = Url::parse("https://example.com/").unwrap();
let keyring =
KeyringProvider::dummy([((url.join("foo").unwrap().as_str(), "user"), "password")]);
// If we attempt an unmatching URL first...
assert_eq!(keyring.fetch(&url.join("bar").unwrap(), "user").await, None);
// ... we will cache the missing credentials on subsequent attempts
assert_eq!(keyring.fetch(&url.join("foo").unwrap(), "user").await, None);
}
#[tokio::test]
async fn fetch_url_username() {
let url = Url::parse("https://example.com").unwrap();

View file

@ -342,7 +342,7 @@ impl AuthMiddleware {
debug!("Found credentials in netrc file for {url}");
Some(credentials)
// N.B. The keyring provider performs lookups for the exact URL then
// falls back to the host, but we cache the result per host so if a keyring
// falls back to the host, but we cache the result per realm so if a keyring
// implementation returns different credentials for different URLs in the
// same realm we will use the wrong credentials.
} else if let Some(credentials) = match self.keyring {
@ -1262,4 +1262,104 @@ mod tests {
Ok(())
}
/// Demonstrates "incorrect" behavior in our cache which avoids an expensive fetch of
/// credentials for _every_ request URL at the cost of inconsistent behavior when
/// credentials are not scoped to a realm.
#[test(tokio::test)]
async fn test_credentials_from_keyring_mixed_authentication_in_realm_same_username(
) -> Result<(), Error> {
let username = "user";
let password_1 = "password1";
let password_2 = "password2";
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path_regex("/prefix_1.*"))
.and(basic_auth(username, password_1))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.and(path_regex("/prefix_2.*"))
.and(basic_auth(username, password_2))
.respond_with(ResponseTemplate::new(200))
.mount(&server)
.await;
Mock::given(method("GET"))
.respond_with(ResponseTemplate::new(401))
.mount(&server)
.await;
let base_url = Url::parse(&server.uri())?;
let base_url_1 = base_url.join("prefix_1")?;
let base_url_2 = base_url.join("prefix_2")?;
let client = test_client_builder()
.with(
AuthMiddleware::new()
.with_cache(CredentialsCache::new())
.with_keyring(Some(KeyringProvider::dummy([
((base_url_1.clone(), username), password_1),
((base_url_2.clone(), username), password_2),
]))),
)
.build();
// Both servers do not work without a username
assert_eq!(
client.get(base_url_1.clone()).send().await?.status(),
401,
"Requests should require a username"
);
assert_eq!(
client.get(base_url_2.clone()).send().await?.status(),
401,
"Requests should require a username"
);
let mut url_1 = base_url_1.clone();
url_1.set_username(username).unwrap();
assert_eq!(
client.get(url_1.clone()).send().await?.status(),
200,
"The first request with a username will succeed"
);
assert_eq!(
client.get(base_url_2.clone()).send().await?.status(),
401,
"Credentials should not be re-used for the second prefix"
);
assert_eq!(
client
.get(base_url.join("prefix_1/foo")?)
.send()
.await?
.status(),
200,
"Subsequent requests can be to different paths in the same prefix"
);
let mut url_2 = base_url_2.clone();
url_2.set_username(username).unwrap();
assert_eq!(
client.get(url_2.clone()).send().await?.status(),
401, // INCORRECT BEHAVIOR
"A request with the same username and realm for a URL that needs a different password will fail"
);
assert_eq!(
client
.get(base_url.join("prefix_2/foo")?)
.send()
.await?
.status(),
401, // INCORRECT BEHAVIOR
"Requests to other paths in the failing prefix will also fail"
);
Ok(())
}
}