Add an editable index to the site-packages registry (#671)

This PR modifies `SitePackages` to store all distributions in a flat
vector, and maintain two indexes (hash maps) from "per-element data for
an element in the vector" to "index of that element". This enables us to
maintain a map on both package name and editable URL.
This commit is contained in:
Charlie Marsh 2023-12-16 22:44:36 -05:00 committed by GitHub
parent 08edd173db
commit 00e1c33af4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 129 additions and 54 deletions

View file

@ -3,6 +3,7 @@ use std::str::FromStr;
use anyhow::{anyhow, Context, Result}; use anyhow::{anyhow, Context, Result};
use fs_err as fs; use fs_err as fs;
use url::Url;
use pep440_rs::Version; use pep440_rs::Version;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
@ -140,4 +141,15 @@ impl InstalledDist {
Metadata21::parse(&contents) Metadata21::parse(&contents)
.with_context(|| format!("Failed to parse METADATA file at: {}", path.display())) .with_context(|| format!("Failed to parse METADATA file at: {}", path.display()))
} }
/// Return the [`Url`] of the distribution, if it is editable.
pub fn editable(&self) -> Option<&Url> {
match self {
Self::Url(InstalledDirectUrlDist {
url: DirectUrl::LocalDirectory { url, dir_info },
..
}) if dir_info.editable == Some(true) => Some(url),
_ => None,
}
}
} }

View file

@ -1,4 +1,6 @@
use anyhow::Result; use anyhow::Result;
use distribution_types::Metadata;
use itertools::Itertools;
use tracing::debug; use tracing::debug;
use platform_host::Platform; use platform_host::Platform;
@ -21,7 +23,10 @@ pub(crate) fn freeze(cache: &Cache, _printer: Printer) -> Result<ExitStatus> {
// Build the installed index. // Build the installed index.
let site_packages = SitePackages::from_executable(&python)?; let site_packages = SitePackages::from_executable(&python)?;
for dist in site_packages.distributions() { for dist in site_packages
.iter()
.sorted_unstable_by(|a, b| a.name().cmp(b.name()))
{
#[allow(clippy::print_stdout)] #[allow(clippy::print_stdout)]
{ {
println!("{dist}"); println!("{dist}");

View file

@ -78,11 +78,7 @@ pub(crate) async fn pip_uninstall(
// Identify all editables that are installed. // Identify all editables that are installed.
for editable in &editables { for editable in &editables {
if let Some(distribution) = site_packages if let Some(distribution) = site_packages.get_editable(editable) {
.editables()
.find(|(_dist, url, _dir_info)| url == editable)
.map(|(dist, _url, _dir_info)| dist)
{
distributions.push(distribution); distributions.push(distribution);
} else { } else {
writeln!( writeln!(

View file

@ -70,15 +70,10 @@ impl InstallPlan {
let mut seen = let mut seen =
FxHashMap::with_capacity_and_hasher(requirements.len(), BuildHasherDefault::default()); FxHashMap::with_capacity_and_hasher(requirements.len(), BuildHasherDefault::default());
// TODO(charlie): This is quadratic. We should index the editable requirements by URL. // Remove any editable requirements.
for editable in editable_requirements { for editable in editable_requirements {
let editable_dist = site_packages if site_packages.remove_editable(editable.raw()).is_some() {
.editables()
.find(|(_dist, url, _dir_info)| url == &editable.url().raw())
.map(|(dist, _url, _dir_info)| dist.clone());
if let Some(dist) = editable_dist {
debug!("Treating editable requirement as immutable: {editable}"); debug!("Treating editable requirement as immutable: {editable}");
site_packages.remove(dist.name());
} else { } else {
editables.push(editable.clone()); editables.push(editable.clone());
} }
@ -285,8 +280,10 @@ impl InstallPlan {
// If Puffin created the virtual environment, then remove all packages, regardless of // If Puffin created the virtual environment, then remove all packages, regardless of
// whether they're considered "seed" packages. // whether they're considered "seed" packages.
let seed_packages = !venv.cfg().is_ok_and(|cfg| cfg.is_gourgeist()); let seed_packages = !venv.cfg().is_ok_and(|cfg| cfg.is_gourgeist());
for (package, dist_info) in site_packages { for dist_info in site_packages {
if seed_packages && matches!(package.as_ref(), "pip" | "setuptools" | "wheel") { if seed_packages
&& matches!(dist_info.name().as_ref(), "pip" | "setuptools" | "wheel")
{
debug!("Preserving seed package: {dist_info}"); debug!("Preserving seed package: {dist_info}");
continue; continue;
} }

View file

@ -1,38 +1,54 @@
use std::collections::BTreeMap;
use std::hash::BuildHasherDefault; use std::hash::BuildHasherDefault;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use fs_err as fs; use fs_err as fs;
use rustc_hash::FxHashSet; use rustc_hash::{FxHashMap, FxHashSet};
use url::Url; use url::Url;
use distribution_types::{InstalledDirectUrlDist, InstalledDist, Metadata, VersionOrUrl}; use distribution_types::{InstalledDist, Metadata, VersionOrUrl};
use pep440_rs::{Version, VersionSpecifiers}; use pep440_rs::{Version, VersionSpecifiers};
use pep508_rs::Requirement; use pep508_rs::Requirement;
use puffin_interpreter::Virtualenv; use puffin_interpreter::Virtualenv;
use puffin_normalize::PackageName; use puffin_normalize::PackageName;
use pypi_types::{DirInfo, DirectUrl};
/// An index over the packages installed in an environment.
///
/// Packages are indexed by both name and (for editable installs) URL.
#[derive(Debug)] #[derive(Debug)]
pub struct SitePackages<'a> { pub struct SitePackages<'a> {
venv: &'a Virtualenv, venv: &'a Virtualenv,
index: BTreeMap<PackageName, InstalledDist>, /// The vector of all installed distributions.
distributions: Vec<InstalledDist>,
/// The installed distributions, keyed by name.
by_name: FxHashMap<PackageName, usize>,
/// The installed editable distributions, keyed by URL.
by_url: FxHashMap<Url, usize>,
} }
impl<'a> SitePackages<'a> { impl<'a> SitePackages<'a> {
/// Build an index of installed packages from the given Python executable. /// Build an index of installed packages from the given Python executable.
pub fn from_executable(venv: &'a Virtualenv) -> Result<SitePackages<'a>> { pub fn from_executable(venv: &'a Virtualenv) -> Result<SitePackages<'a>> {
let mut index = BTreeMap::new(); let mut distributions: Vec<InstalledDist> = Vec::new();
let mut by_name = FxHashMap::default();
let mut by_url = FxHashMap::default();
// Index all installed packages by name.
for entry in fs::read_dir(venv.site_packages())? { for entry in fs::read_dir(venv.site_packages())? {
let entry = entry?; let entry = entry?;
if entry.file_type()?.is_dir() { if entry.file_type()?.is_dir() {
if let Some(dist_info) = let Some(dist_info) =
InstalledDist::try_from_path(&entry.path()).with_context(|| { InstalledDist::try_from_path(&entry.path()).with_context(|| {
format!("Failed to read metadata: from {}", entry.path().display()) format!("Failed to read metadata: from {}", entry.path().display())
})? })?
{ else {
if let Some(existing) = index.insert(dist_info.name().clone(), dist_info) { continue;
};
let idx = distributions.len();
// Index the distribution by name.
if let Some(existing) = by_name.insert(dist_info.name().clone(), idx) {
let existing = &distributions[existing];
anyhow::bail!( anyhow::bail!(
"Found duplicate package in environment: {} ({} vs. {})", "Found duplicate package in environment: {} ({} vs. {})",
existing.name(), existing.name(),
@ -40,21 +56,41 @@ impl<'a> SitePackages<'a> {
entry.path().display() entry.path().display()
); );
} }
}
// Index the distribution by URL.
if let Some(url) = dist_info.editable() {
if let Some(existing) = by_url.insert(url.clone(), idx) {
let existing = &distributions[existing];
anyhow::bail!(
"Found duplicate editable in environment: {} ({} vs. {})",
existing.name(),
existing.path().display(),
entry.path().display()
);
} }
} }
Ok(Self { venv, index }) // Add the distribution to the database.
distributions.push(dist_info);
}
}
Ok(Self {
venv,
distributions,
by_name,
by_url,
})
} }
/// Returns an iterator over the installed distributions. /// Returns an iterator over the installed distributions.
pub fn distributions(&self) -> impl Iterator<Item = &InstalledDist> { pub fn iter(&self) -> impl Iterator<Item = &InstalledDist> {
self.index.values() self.distributions.iter()
} }
/// Returns an iterator over the the installed distributions, represented as requirements. /// Returns an iterator over the the installed distributions, represented as requirements.
pub fn requirements(&self) -> impl Iterator<Item = pep508_rs::Requirement> + '_ { pub fn requirements(&self) -> impl Iterator<Item = Requirement> + '_ {
self.distributions().map(|dist| pep508_rs::Requirement { self.iter().map(|dist| Requirement {
name: dist.name().clone(), name: dist.name().clone(),
extras: None, extras: None,
version_or_url: Some(match dist.version_or_url() { version_or_url: Some(match dist.version_or_url() {
@ -69,45 +105,66 @@ impl<'a> SitePackages<'a> {
}) })
} }
/// Returns an iterator over the installed editables.
pub fn editables(&self) -> impl Iterator<Item = (&InstalledDist, &Url, &DirInfo)> {
self.distributions().filter_map(|dist| {
// Editable installs are recorded through this type only
match dist {
InstalledDist::Url(InstalledDirectUrlDist {
url: DirectUrl::LocalDirectory { url, dir_info },
..
}) if dir_info.editable == Some(true) => Some((dist, url, dir_info)),
_ => None,
}
})
}
/// Returns the version of the given package, if it is installed. /// Returns the version of the given package, if it is installed.
pub fn get(&self, name: &PackageName) -> Option<&InstalledDist> { pub fn get(&self, name: &PackageName) -> Option<&InstalledDist> {
self.index.get(name) self.by_name.get(name).map(|idx| &self.distributions[*idx])
} }
/// Remove the given package from the index, returning its version if it was installed. /// Remove the given package from the index, returning its version if it was installed.
pub fn remove(&mut self, name: &PackageName) -> Option<InstalledDist> { pub fn remove(&mut self, name: &PackageName) -> Option<InstalledDist> {
self.index.remove(name) let idx = self.by_name.get(name)?;
Some(self.swap_remove(*idx))
}
/// Returns the editable distribution installed from the given URL, if any.
pub fn get_editable(&self, url: &Url) -> Option<&InstalledDist> {
self.by_url.get(url).map(|idx| &self.distributions[*idx])
}
/// Remove the editable distribution installed from the given URL, if any.
pub fn remove_editable(&mut self, url: &Url) -> Option<InstalledDist> {
let idx = self.by_url.get(url)?;
Some(self.swap_remove(*idx))
}
/// Remove the distribution at the given index.
fn swap_remove(&mut self, idx: usize) -> InstalledDist {
// Remove from the existing index.
let dist = self.distributions.swap_remove(idx);
// If the distribution wasn't at the end, rewrite the entries for the moved distribution.
if idx < self.distributions.len() {
let moved = &self.distributions[idx];
if let Some(prev) = self.by_name.get_mut(moved.name()) {
*prev = idx;
}
if let Some(url) = moved.editable() {
if let Some(prev) = self.by_url.get_mut(url) {
*prev = idx;
}
}
}
dist
} }
/// Returns `true` if there are no installed packages. /// Returns `true` if there are no installed packages.
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
self.index.is_empty() self.distributions.is_empty()
} }
/// Returns the number of installed packages. /// Returns the number of installed packages.
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.index.len() self.distributions.len()
} }
/// Validate the installed packages in the virtual environment. /// Validate the installed packages in the virtual environment.
pub fn diagnostics(&self) -> Result<Vec<Diagnostic>> { pub fn diagnostics(&self) -> Result<Vec<Diagnostic>> {
let mut diagnostics = Vec::new(); let mut diagnostics = Vec::new();
for (package, distribution) in &self.index { for (package, index) in &self.by_name {
let distribution = &self.distributions[*index];
// Determine the dependencies for the given package. // Determine the dependencies for the given package.
let metadata = distribution.metadata()?; let metadata = distribution.metadata()?;
@ -128,7 +185,11 @@ impl<'a> SitePackages<'a> {
continue; continue;
} }
let Some(installed) = self.index.get(&requirement.name) else { let Some(installed) = self
.by_name
.get(&requirement.name)
.map(|idx| &self.distributions[*idx])
else {
diagnostics.push(Diagnostic::MissingDependency { diagnostics.push(Diagnostic::MissingDependency {
package: package.clone(), package: package.clone(),
requirement: requirement.clone(), requirement: requirement.clone(),
@ -171,7 +232,11 @@ impl<'a> SitePackages<'a> {
continue; continue;
} }
let Some(distribution) = self.index.get(&requirement.name) else { let Some(distribution) = self
.by_name
.get(&requirement.name)
.map(|idx| &self.distributions[*idx])
else {
// The package isn't installed. // The package isn't installed.
return Ok(false); return Ok(false);
}; };
@ -215,11 +280,11 @@ impl<'a> SitePackages<'a> {
} }
impl IntoIterator for SitePackages<'_> { impl IntoIterator for SitePackages<'_> {
type Item = (PackageName, InstalledDist); type Item = InstalledDist;
type IntoIter = std::collections::btree_map::IntoIter<PackageName, InstalledDist>; type IntoIter = std::vec::IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
self.index.into_iter() self.distributions.into_iter()
} }
} }