mirror of
https://github.com/astral-sh/uv.git
synced 2025-12-02 16:16:47 +00:00
Share flat index across resolutions (#930)
## Summary This PR restructures the flat index fetching in a few ways: 1. It now lives in its own `FlatIndexClient`, since it felt a bit awkward (in my opinion) for it to live in `RegistryClient`. 2. We now fetch the `FlatIndex` outside of the resolver. This has a few benefits: (1) the resolver construct is no longer `async` and no longer returns `Result`, which feels better for a resolver; and (2) we can share the `FlatIndex` across resolutions rather than re-fetching it for every source distribution build.
This commit is contained in:
parent
e6d7124147
commit
42888a9609
16 changed files with 336 additions and 192 deletions
|
|
@ -1,19 +1,172 @@
|
|||
use std::collections::btree_map::Entry;
|
||||
use std::collections::BTreeMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use reqwest::Response;
|
||||
use rustc_hash::FxHashMap;
|
||||
use tracing::instrument;
|
||||
use tracing::{debug, info_span, instrument, warn, Instrument};
|
||||
use url::Url;
|
||||
|
||||
use distribution_filename::DistFilename;
|
||||
use distribution_types::{
|
||||
BuiltDist, Dist, File, IndexUrl, PrioritizedDistribution, RegistryBuiltDist,
|
||||
RegistrySourceDist, SourceDist,
|
||||
BuiltDist, Dist, File, FileLocation, FlatIndexLocation, IndexUrl, PrioritizedDistribution,
|
||||
RegistryBuiltDist, RegistrySourceDist, SourceDist,
|
||||
};
|
||||
use pep440_rs::Version;
|
||||
use pep508_rs::VerbatimUrl;
|
||||
use platform_tags::Tags;
|
||||
use puffin_cache::{Cache, CacheBucket};
|
||||
use puffin_normalize::PackageName;
|
||||
use pypi_types::Hashes;
|
||||
|
||||
pub type FlatIndexEntry = (DistFilename, File, IndexUrl);
|
||||
use crate::html::SimpleHtml;
|
||||
use crate::{Error, RegistryClient};
|
||||
|
||||
type FlatIndexEntry = (DistFilename, File, IndexUrl);
|
||||
|
||||
/// A client for reading distributions from `--find-links` entries (either local directories or
|
||||
/// remote HTML indexes).
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FlatIndexClient<'a> {
|
||||
client: &'a RegistryClient,
|
||||
cache: &'a Cache,
|
||||
}
|
||||
|
||||
impl<'a> FlatIndexClient<'a> {
|
||||
/// Create a new [`FlatIndexClient`].
|
||||
pub fn new(client: &'a RegistryClient, cache: &'a Cache) -> Self {
|
||||
Self { client, cache }
|
||||
}
|
||||
|
||||
/// Read the directories and flat remote indexes from `--find-links`.
|
||||
#[allow(clippy::result_large_err)]
|
||||
pub async fn fetch(
|
||||
&self,
|
||||
indexes: impl Iterator<Item = &FlatIndexLocation>,
|
||||
) -> Result<Vec<FlatIndexEntry>, Error> {
|
||||
let mut dists = Vec::new();
|
||||
// TODO(konstin): Parallelize reads over flat indexes.
|
||||
for flat_index in indexes {
|
||||
let index_dists = match flat_index {
|
||||
FlatIndexLocation::Path(path) => {
|
||||
Self::read_from_directory(path).map_err(Error::FindLinks)?
|
||||
}
|
||||
FlatIndexLocation::Url(url) => self.read_from_url(url).await?,
|
||||
};
|
||||
if index_dists.is_empty() {
|
||||
warn!("No packages found in `--find-links` entry: {}", flat_index);
|
||||
} else {
|
||||
debug!(
|
||||
"Found {} package{} in `--find-links` entry: {}",
|
||||
index_dists.len(),
|
||||
if index_dists.len() == 1 { "" } else { "s" },
|
||||
flat_index
|
||||
);
|
||||
}
|
||||
dists.extend(index_dists);
|
||||
}
|
||||
Ok(dists)
|
||||
}
|
||||
|
||||
/// Read a flat remote index from a `--find-links` URL.
|
||||
async fn read_from_url(&self, url: &Url) -> Result<Vec<FlatIndexEntry>, Error> {
|
||||
let cache_entry = self.cache.entry(
|
||||
CacheBucket::FlatIndex,
|
||||
"html",
|
||||
format!("{}.msgpack", cache_key::digest(&url.to_string())),
|
||||
);
|
||||
let cached_client = self.client.cached_client();
|
||||
|
||||
let flat_index_request = cached_client
|
||||
.uncached()
|
||||
.get(url.clone())
|
||||
.header("Accept-Encoding", "gzip")
|
||||
.header("Accept", "text/html")
|
||||
.build()?;
|
||||
let parse_simple_response = |response: Response| {
|
||||
async {
|
||||
let text = response.text().await?;
|
||||
let SimpleHtml { base, files } = SimpleHtml::parse(&text, url)
|
||||
.map_err(|err| Error::from_html_err(err, url.clone()))?;
|
||||
|
||||
let files: Vec<File> = files
|
||||
.into_iter()
|
||||
.filter_map(|file| {
|
||||
match File::try_from(file, &base) {
|
||||
Ok(file) => Some(file),
|
||||
Err(err) => {
|
||||
// Ignore files with unparseable version specifiers.
|
||||
warn!("Skipping file in {url}: {err}");
|
||||
None
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
Ok(files)
|
||||
}
|
||||
.instrument(info_span!("parse_flat_index_html", url = % url))
|
||||
};
|
||||
let files = cached_client
|
||||
.get_cached_with_callback(flat_index_request, &cache_entry, parse_simple_response)
|
||||
.await?;
|
||||
Ok(files
|
||||
.into_iter()
|
||||
.filter_map(|file| {
|
||||
Some((
|
||||
DistFilename::try_from_normalized_filename(&file.filename)?,
|
||||
file,
|
||||
IndexUrl::Url(url.clone()),
|
||||
))
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// Read a flat remote index from a `--find-links` directory.
|
||||
fn read_from_directory(path: &PathBuf) -> Result<Vec<FlatIndexEntry>, std::io::Error> {
|
||||
// Absolute paths are required for the URL conversion.
|
||||
let path = fs_err::canonicalize(path)?;
|
||||
let url = Url::from_directory_path(&path).expect("URL is already absolute");
|
||||
let url = VerbatimUrl::unknown(url);
|
||||
|
||||
let mut dists = Vec::new();
|
||||
for entry in fs_err::read_dir(&path)? {
|
||||
let entry = entry?;
|
||||
let metadata = entry.metadata()?;
|
||||
if !metadata.is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let Ok(filename) = entry.file_name().into_string() else {
|
||||
warn!(
|
||||
"Skipping non-UTF-8 filename in `--find-links` directory: {}",
|
||||
entry.file_name().to_string_lossy()
|
||||
);
|
||||
continue;
|
||||
};
|
||||
|
||||
let file = File {
|
||||
dist_info_metadata: None,
|
||||
filename: filename.to_string(),
|
||||
hashes: Hashes { sha256: None },
|
||||
requires_python: None,
|
||||
size: None,
|
||||
upload_time: None,
|
||||
url: FileLocation::Path(entry.path().to_path_buf(), url.clone()),
|
||||
yanked: None,
|
||||
};
|
||||
|
||||
let Some(filename) = DistFilename::try_from_normalized_filename(&filename) else {
|
||||
debug!(
|
||||
"Ignoring `--find-links` entry (expected a wheel or source distribution filename): {}",
|
||||
entry.path().display()
|
||||
);
|
||||
continue;
|
||||
};
|
||||
dists.push((filename, file, IndexUrl::Pypi));
|
||||
}
|
||||
Ok(dists)
|
||||
}
|
||||
}
|
||||
|
||||
/// A set of [`PrioritizedDistribution`] from a `--find-links` entry, indexed by [`PackageName`]
|
||||
/// and [`Version`].
|
||||
|
|
@ -23,11 +176,11 @@ pub struct FlatIndex(FxHashMap<PackageName, FlatDistributions>);
|
|||
impl FlatIndex {
|
||||
/// Collect all files from a `--find-links` target into a [`FlatIndex`].
|
||||
#[instrument(skip_all)]
|
||||
pub fn from_files(dists: Vec<FlatIndexEntry>, tags: &Tags) -> Self {
|
||||
pub fn from_entries(entries: Vec<FlatIndexEntry>, tags: &Tags) -> Self {
|
||||
let mut flat_index = FxHashMap::default();
|
||||
|
||||
// Collect compatible distributions.
|
||||
for (filename, file, index) in dists {
|
||||
for (filename, file, index) in entries {
|
||||
let distributions = flat_index.entry(filename.name().clone()).or_default();
|
||||
Self::add_file(distributions, file, filename, tags, index);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue