Add script to check the top 8k pypi packages (#198)

To check to top 1k (current state):

```bash
scripts/resolve/get_pypi_top_8k.sh
cargo run --bin puffin-dev -- resolve-many scripts/resolve/pypi_top_8k_flat.txt --limit 1000
```

Results:
```
Errors: pywin32, geoip2, maxminddb, pypika, dirac
Success: 995, Error: 5
```
pywin32 has no solution for the build environment, 3 have no
`[build-system]` entry in pyproject.toml, `dirac` is missing cmake
This commit is contained in:
konsti 2023-10-26 14:03:59 +02:00 committed by GitHub
parent 216b6c41c2
commit 5ad58474ca
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 209 additions and 26 deletions

55
Cargo.lock generated
View file

@ -136,6 +136,12 @@ version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6"
[[package]]
name = "arrayvec"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "assert_cmd"
version = "2.0.12"
@ -1286,6 +1292,7 @@ dependencies = [
"number_prefix",
"portable-atomic",
"unicode-width",
"vt100",
]
[[package]]
@ -2112,6 +2119,7 @@ dependencies = [
"fs-err",
"futures",
"gourgeist",
"indicatif",
"itertools",
"pep508_rs",
"platform-host",
@ -2121,9 +2129,11 @@ dependencies = [
"puffin-dispatch",
"puffin-interpreter",
"puffin-package",
"puffin-traits",
"tempfile",
"tokio",
"tracing",
"tracing-indicatif",
"tracing-subscriber",
"which",
]
@ -3336,6 +3346,18 @@ dependencies = [
"valuable",
]
[[package]]
name = "tracing-indicatif"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57e05fe4a1c906d94b275d8aeb8ff8b9deaca502aeb59ae8ab500a92b8032ac8"
dependencies = [
"indicatif",
"tracing",
"tracing-core",
"tracing-subscriber",
]
[[package]]
name = "tracing-log"
version = "0.1.3"
@ -3479,6 +3501,39 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "vt100"
version = "0.15.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84cd863bf0db7e392ba3bd04994be3473491b31e66340672af5d11943c6274de"
dependencies = [
"itoa",
"log",
"unicode-width",
"vte",
]
[[package]]
name = "vte"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197"
dependencies = [
"arrayvec",
"utf8parse",
"vte_generate_state_changes",
]
[[package]]
name = "vte_generate_state_changes"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d257817081c7dffcdbab24b9e62d2def62e2ff7d00b1c20062551e6cccc145ff"
dependencies = [
"proc-macro2",
"quote",
]
[[package]]
name = "wait-timeout"
version = "0.2.0"

View file

@ -63,6 +63,7 @@ tokio-util = { version = "0.7.9", features = ["compat"] }
toml = { version = "0.8.2" }
toml_edit = { version = "0.20.2" }
tracing = { version = "0.1.37" }
tracing-indicatif = { version = "0.3.5" }
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
tracing-tree = { version = "0.2.5" }
unicode-width = { version = "0.1.8" }

View file

@ -163,7 +163,7 @@ fn install_hardlink() -> Result<()> {
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
(r"\d+ms|\d+\.\d+s", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
@ -210,7 +210,7 @@ fn install_many() -> Result<()> {
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
(r"\d+ms|\d+\.\d+s", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
@ -265,7 +265,7 @@ fn noop() -> Result<()> {
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
(r"\d+ms|\d+\.\d+s", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
@ -332,7 +332,7 @@ fn link() -> Result<()> {
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
(r"\d+ms|\d+\.\d+s", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
@ -392,7 +392,7 @@ fn add_remove() -> Result<()> {
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
(r"\d+ms|\d+\.\d+s", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
@ -459,7 +459,7 @@ fn install_sequential() -> Result<()> {
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
(r"\d+ms|\d+\.\d+s", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
@ -519,7 +519,7 @@ fn upgrade() -> Result<()> {
insta::with_settings!({
filters => vec![
(r"\d+ms", "[TIME]"),
(r"\d+ms|\d+\.\d+s", "[TIME]"),
]
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))

View file

@ -20,6 +20,7 @@ puffin-client = { path = "../puffin-client" }
puffin-dispatch = { path = "../puffin-dispatch" }
puffin-interpreter = { path = "../puffin-interpreter" }
puffin-package = { path = "../puffin-package" }
puffin-traits = { path = "../puffin-traits" }
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
@ -27,9 +28,11 @@ colored = { workspace = true }
directories = { workspace = true }
fs-err = { workspace = true }
futures = { workspace = true }
indicatif = { workspace = true }
itertools = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
tracing-indicatif = { workspace = true }
tracing-subscriber = { workspace = true }
which = { workspace = true }

View file

@ -7,17 +7,27 @@ use anyhow::Result;
use clap::Parser;
use colored::Colorize;
use tracing::debug;
use tracing_subscriber::fmt::format::FmtSpan;
use tracing_indicatif::IndicatifLayer;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::{fmt, EnvFilter};
use tracing_subscriber::EnvFilter;
use puffin_dev::{build, BuildArgs};
use resolve_many::ResolveManyArgs;
mod resolve_many;
#[derive(Parser)]
enum Cli {
/// Build a source distribution into a wheel
Build(BuildArgs),
/// Resolve many requirements independently in parallel and report failures and sucesses.
///
/// Run `scripts/resolve/get_pypi_top_8k.sh` once, then
/// ```bash
/// cargo run --bin puffin-dev -- resolve-many scripts/resolve/pypi_top_8k_flat.txt
/// ```
ResolveMany(ResolveManyArgs),
}
async fn run() -> Result<()> {
@ -27,15 +37,29 @@ async fn run() -> Result<()> {
let target = build(args).await?;
println!("Wheel built to {}", target.display());
}
Cli::ResolveMany(args) => {
resolve_many::resolve_many(args).await?;
}
}
Ok(())
}
#[tokio::main]
async fn main() -> ExitCode {
let indicatif_layer = IndicatifLayer::new();
let indicitif_compatible_writer_layer = tracing_subscriber::fmt::layer()
.with_writer(indicatif_layer.get_stderr_writer())
.with_target(false);
let filter_layer = EnvFilter::try_from_default_env().unwrap_or_else(|_| {
EnvFilter::builder()
// Show only the important spans
.parse("puffin_dev=info,puffin_dispatch=info")
.unwrap()
});
tracing_subscriber::registry()
.with(fmt::layer().with_span_events(FmtSpan::CLOSE))
.with(EnvFilter::from_default_env())
.with(filter_layer)
.with(indicitif_compatible_writer_layer)
.with(indicatif_layer)
.init();
let start = Instant::now();

View file

@ -0,0 +1,92 @@
use clap::Parser;
use directories::ProjectDirs;
use futures::stream::FuturesUnordered;
use futures::StreamExt;
use indicatif::ProgressStyle;
use pep508_rs::Requirement;
use platform_host::Platform;
use puffin_client::RegistryClientBuilder;
use puffin_dispatch::BuildDispatch;
use puffin_interpreter::Virtualenv;
use puffin_traits::BuildContext;
use std::fs;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use tokio::sync::Semaphore;
use tracing::{info, info_span, span, Level, Span};
use tracing_indicatif::span_ext::IndicatifSpanExt;
#[derive(Parser)]
pub(crate) struct ResolveManyArgs {
list: PathBuf,
#[clap(long)]
limit: Option<usize>,
}
pub(crate) async fn resolve_many(args: ResolveManyArgs) -> anyhow::Result<()> {
let data = fs::read_to_string(&args.list)?;
let lines = data.lines().map(Requirement::from_str);
let requirements: Vec<Requirement> = if let Some(limit) = args.limit {
lines.take(limit).collect::<anyhow::Result<_, _>>()?
} else {
lines.collect::<anyhow::Result<_, _>>()?
};
let project_dirs = ProjectDirs::from("", "", "puffin");
let cache = project_dirs.as_ref().map(ProjectDirs::cache_dir);
let platform = Platform::current()?;
let venv = Virtualenv::from_env(platform, cache)?;
let build_dispatch = BuildDispatch::new(
RegistryClientBuilder::default().cache(cache).build(),
cache.map(Path::to_path_buf),
venv.interpreter_info().clone(),
fs::canonicalize(venv.python_executable())?,
);
let build_dispatch_arc = Arc::new(build_dispatch);
let mut tasks = FuturesUnordered::new();
let semaphore = Arc::new(Semaphore::new(50));
let header_span = info_span!("resolve many");
header_span.pb_set_style(&ProgressStyle::default_bar());
header_span.pb_set_length(requirements.len() as u64);
let _header_span_enter = header_span.enter();
for requirement in requirements {
let build_dispatch_arc = build_dispatch_arc.clone();
let semaphore = semaphore.clone();
tasks.push(tokio::spawn(async move {
let span = span!(Level::TRACE, "resolving");
let _enter = span.enter();
let permit = semaphore.clone().acquire_owned().await.unwrap();
let result = build_dispatch_arc.resolve(&[requirement.clone()]).await;
drop(permit);
(requirement.to_string(), result)
}));
}
let mut success = 0usize;
let mut errors = Vec::new();
while let Some(result) = tasks.next().await {
let (package, result) = result.unwrap();
match result {
Ok(resolution) => {
info!("Success: {} ({} package(s))", package, resolution.len());
success += 1;
}
Err(err) => {
info!("Error for {}: {:?}", package, err);
errors.push(package);
}
}
Span::current().pb_inc(1);
}
info!("Errors: {}", errors.join(", "));
info!("Success: {}, Error: {}", success, errors.len());
Ok(())
}

View file

@ -9,6 +9,7 @@ use std::pin::Pin;
use anyhow::Context;
use itertools::Itertools;
use tempfile::tempdir;
use tracing::{debug, instrument};
use pep508_rs::Requirement;
use platform_tags::Tags;
@ -20,7 +21,6 @@ use puffin_installer::{
use puffin_interpreter::{InterpreterInfo, Virtualenv};
use puffin_resolver::{Manifest, ResolutionMode, Resolver, WheelFinder};
use puffin_traits::BuildContext;
use tracing::debug;
/// The main implementation of [`BuildContext`], used by the CLI, see [`BuildContext`]
/// documentation.
@ -60,10 +60,11 @@ impl BuildContext for BuildDispatch {
&self.base_python
}
#[instrument(skip(self))]
fn resolve<'a>(
&'a self,
requirements: &'a [Requirement],
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Requirement>>> + 'a>> {
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Requirement>>> + Send + 'a>> {
Box::pin(async {
let tags = Tags::from_env(
self.interpreter_info.platform(),
@ -88,11 +89,12 @@ impl BuildContext for BuildDispatch {
})
}
#[instrument(skip(self))]
fn install<'a>(
&'a self,
requirements: &'a [Requirement],
venv: &'a Virtualenv,
) -> Pin<Box<dyn Future<Output = anyhow::Result<()>> + 'a>> {
) -> Pin<Box<dyn Future<Output = anyhow::Result<()>> + Send + 'a>> {
Box::pin(async move {
debug!(
"Install in {} requirements {}",
@ -160,11 +162,12 @@ impl BuildContext for BuildDispatch {
})
}
#[instrument(skip(self))]
fn build_source_distribution<'a>(
&'a self,
sdist: &'a Path,
wheel_dir: &'a Path,
) -> Pin<Box<dyn Future<Output = anyhow::Result<String>> + 'a>> {
) -> Pin<Box<dyn Future<Output = anyhow::Result<String>> + Send + 'a>> {
Box::pin(async move {
let builder =
SourceDistributionBuilder::setup(sdist, &self.interpreter_info, self).await?;

View file

@ -1,5 +1,5 @@
pub use error::ResolveError;
pub use resolution::PinnedPackage;
pub use resolution::{Graph, PinnedPackage};
pub use resolver::{Manifest, Resolver};
pub use selector::ResolutionMode;
pub use source_distribution::BuiltSourceDistributionCache;

View file

@ -64,7 +64,7 @@ impl Manifest {
}
}
pub struct Resolver<'a, Context: BuildContext> {
pub struct Resolver<'a, Context: BuildContext + Sync> {
requirements: Vec<Requirement>,
constraints: Vec<Requirement>,
markers: &'a MarkerEnvironment,
@ -75,7 +75,7 @@ pub struct Resolver<'a, Context: BuildContext> {
build_context: &'a Context,
}
impl<'a, Context: BuildContext> Resolver<'a, Context> {
impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
/// Initialize a new resolver.
pub fn new(
manifest: Manifest,
@ -587,7 +587,7 @@ impl<'a, Context: BuildContext> Resolver<'a, Context> {
fn process_request(
&'a self,
request: Request,
) -> Pin<Box<dyn Future<Output = Result<Response, ResolveError>> + 'a>> {
) -> Pin<Box<dyn Future<Output = Result<Response, ResolveError>> + Send + 'a>> {
match request {
Request::Package(package_name) => Box::pin(
self.client

View file

@ -37,7 +37,7 @@ impl BuildContext for DummyContext {
fn resolve<'a>(
&'a self,
_requirements: &'a [Requirement],
) -> Pin<Box<dyn Future<Output = Result<Vec<Requirement>>> + 'a>> {
) -> Pin<Box<dyn Future<Output = Result<Vec<Requirement>>> + Send + 'a>> {
panic!("The test should not need to build source distributions")
}
@ -45,7 +45,7 @@ impl BuildContext for DummyContext {
&'a self,
_requirements: &'a [Requirement],
_venv: &'a Virtualenv,
) -> Pin<Box<dyn Future<Output = Result<()>> + 'a>> {
) -> Pin<Box<dyn Future<Output = Result<()>> + Send + 'a>> {
panic!("The test should not need to build source distributions")
}
@ -53,7 +53,7 @@ impl BuildContext for DummyContext {
&'a self,
_sdist: &'a Path,
_wheel_dir: &'a Path,
) -> Pin<Box<dyn Future<Output = Result<String>> + 'a>> {
) -> Pin<Box<dyn Future<Output = Result<String>> + Send + 'a>> {
panic!("The test should not need to build source distributions")
}
}

View file

@ -60,14 +60,14 @@ pub trait BuildContext {
fn resolve<'a>(
&'a self,
requirements: &'a [Requirement],
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Requirement>>> + 'a>>;
) -> Pin<Box<dyn Future<Output = anyhow::Result<Vec<Requirement>>> + Send + 'a>>;
/// Install the given set of package versions into the virtual environment. The environment must
/// use the same base python as [`Self::python`]
/// use the same base python as [`BuildContext::base_python`]
fn install<'a>(
&'a self,
requirements: &'a [Requirement],
venv: &'a Virtualenv,
) -> Pin<Box<dyn Future<Output = anyhow::Result<()>> + 'a>>;
) -> Pin<Box<dyn Future<Output = anyhow::Result<()>> + Send + 'a>>;
/// Build a source distribution into a wheel from an archive.
///
/// Returns the filename of the built wheel inside the given `wheel_dir`.
@ -75,5 +75,5 @@ pub trait BuildContext {
&'a self,
sdist: &'a Path,
wheel_dir: &'a Path,
) -> Pin<Box<dyn Future<Output = anyhow::Result<String>> + 'a>>;
) -> Pin<Box<dyn Future<Output = anyhow::Result<String>> + Send + 'a>>;
}

1
scripts/resolve/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
pypi_top_8k_flat.txt

View file

@ -0,0 +1,4 @@
#!/usr/bin/env bash
curl https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json | jq -r ".rows | .[].project" > pypi_top_8k_flat.txt