mirror of
https://github.com/astral-sh/uv.git
synced 2025-08-15 08:10:15 +00:00
Store unzipped wheels in a cache (#49)
This PR massively speeds up the case in which you need to install wheels that already exist in the global cache. The new strategy is as follows: - Download the wheel into the content-addressed cache. - Unzip the wheel into the cache, but ignore content-addressing. It turns out that writing to `cacache` for every file in the zip added a ton of overhead, and I don't see any actual advantages to doing so. Instead, we just unzip the contents into a directory at, e.g., `~/.cache/puffin/django-4.1.5`. - (The unzip itself is now parallelized with Rayon.) - When installing the wheel, we now support unzipping from a directory instead of a zip archive. This required duplicating and tweaking a few functions. - When installing the wheel, we now use reflinks (or copy-on-write links). These have a few fantastic properties: (1) they're extremely cheap to create (on macOS, they are allegedly faster than hard links); (2) they minimize disk space, since we avoid copying files entirely in the vast majority of cases; and (3) if the user then edits a file locally, the cache doesn't get polluted. Orogene, Bun, and soon pnpm all use reflinks. Puffin is now ~15x faster than `pip` for the common case of installing cached data into a fresh environment. Closes https://github.com/astral-sh/puffin/issues/21. Closes https://github.com/astral-sh/puffin/issues/39.
This commit is contained in:
parent
a46887d34b
commit
2a846e76b7
14 changed files with 723 additions and 175 deletions
78
crates/install-wheel-rs/src/script.rs
Normal file
78
crates/install-wheel-rs/src/script.rs
Normal file
|
@ -0,0 +1,78 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use regex::Regex;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::Error;
|
||||
|
||||
/// Minimal `direct_url.json` schema
|
||||
///
|
||||
/// <https://packaging.python.org/en/latest/specifications/direct-url/>
|
||||
/// <https://www.python.org/dev/peps/pep-0610/>
|
||||
#[derive(Serialize)]
|
||||
struct DirectUrl {
|
||||
#[allow(clippy::zero_sized_map_values)]
|
||||
archive_info: HashMap<(), ()>,
|
||||
url: String,
|
||||
}
|
||||
|
||||
/// A script defining the name of the runnable entrypoint and the module and function that should be
|
||||
/// run.
|
||||
#[cfg(feature = "python_bindings")]
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
#[pyo3::pyclass(dict)]
|
||||
pub struct Script {
|
||||
#[pyo3(get)]
|
||||
pub script_name: String,
|
||||
#[pyo3(get)]
|
||||
pub module: String,
|
||||
#[pyo3(get)]
|
||||
pub function: String,
|
||||
}
|
||||
|
||||
/// A script defining the name of the runnable entrypoint and the module and function that should be
|
||||
/// run.
|
||||
#[cfg(not(feature = "python_bindings"))]
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize)]
|
||||
pub struct Script {
|
||||
pub script_name: String,
|
||||
pub module: String,
|
||||
pub function: String,
|
||||
}
|
||||
|
||||
impl Script {
|
||||
/// Parses a script definition like `foo.bar:main` or `foomod:main_bar [bar,baz]`
|
||||
///
|
||||
/// <https://packaging.python.org/en/latest/specifications/entry-points/>
|
||||
///
|
||||
/// Extras are supposed to be ignored, which happens if you pass None for extras
|
||||
pub fn from_value(
|
||||
script_name: &str,
|
||||
value: &str,
|
||||
extras: Option<&[String]>,
|
||||
) -> Result<Option<Script>, Error> {
|
||||
let script_regex = Regex::new(r"^(?P<module>[\w\d_\-.]+):(?P<function>[\w\d_\-.]+)(?:\s+\[(?P<extras>(?:[^,]+,?\s*)+)\])?$").unwrap();
|
||||
|
||||
let captures = script_regex
|
||||
.captures(value)
|
||||
.ok_or_else(|| Error::InvalidWheel(format!("invalid console script: '{value}'")))?;
|
||||
if let Some(script_extras) = captures.name("extras") {
|
||||
let script_extras = script_extras
|
||||
.as_str()
|
||||
.split(',')
|
||||
.map(|extra| extra.trim().to_string())
|
||||
.collect::<HashSet<String>>();
|
||||
if let Some(extras) = extras {
|
||||
if !script_extras.is_subset(&extras.iter().cloned().collect()) {
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(Script {
|
||||
script_name: script_name.to_string(),
|
||||
module: captures.name("module").unwrap().as_str().to_string(),
|
||||
function: captures.name("function").unwrap().as_str().to_string(),
|
||||
}))
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue