Log versions tried from batch prefetch (#3090)

This is required for evaluating #3087.

This also removes tracking of virtual packages from extras from the
batch prefetcher (we only track real packages).

Let's look at some stats:
* jupyter: Tried 100 versions: anyio 1, argon2-cffi 1,
argon2-cffi-bindings 1, arrow 1, asttokens 1, async-lru 1, attrs 1,
babel 1, beautifulsoup4 1, bleach 1, certifi 1, cffi 1,
charset-normalizer 1, comm 1, debugpy 1, decorator 1, defusedxml 1,
exceptiongroup 1, executing 1, fastjsonschema 1, fqdn 1, h11 1, httpcore
1, httpx 1, idna 1, ipykernel 1, ipython 1, ipywidgets 1, isoduration 1,
jedi 1, jinja2 1, json5 1, jsonpointer 1, jsonschema 1,
jsonschema-specifications 1, jupyter 1, jupyter-client 1,
jupyter-console 1, jupyter-core 1, jupyter-events 1, jupyter-lsp 1,
jupyter-server 1, jupyter-server-terminals 1, jupyterlab 1,
jupyterlab-pygments 1, jupyterlab-server 1, jupyterlab-widgets 1,
markupsafe 1, matplotlib-inline 1, mistune 1, nbclient 1, nbconvert 1,
nbformat 1, nest-asyncio 1, notebook 1, notebook-shim 1, overrides 1,
packaging 1, pandocfilters 1, parso 1, pexpect 1, platformdirs 1,
prometheus-client 1, prompt-toolkit 1, psutil 1, ptyprocess 1, pure-eval
1, pycparser 1, pygments 1, python-dateutil 1, python-json-logger 1,
pyyaml 1, pyzmq 1, qtconsole 1, qtpy 1, referencing 1, requests 1,
rfc3339-validator 1, rfc3986-validator 1, root 1, rpds-py 1, send2trash
1, six 1, sniffio 1, soupsieve 1, stack-data 1, terminado 1, tinycss2 1,
tomli 1, tornado 1, traitlets 1, types-python-dateutil 1,
typing-extensions 1, uri-template 1, urllib3 1, wcwidth 1, webcolors 1,
webencodings 1, websocket-client 1, widgetsnbextension 1
* boto3: botocore 1697, boto3 849, urllib3 2, jmespath 1,
python-dateutil 1, root 1, s3transfer 1, six 1
* transformers-extras: Tried 1191 versions: sagemaker 152, hypothesis
67, tensorflow 21, jsonschema 19, tensorflow-cpu 18, multiprocess 10,
pathos 10, tensorflow-text 10, chex 8, tf-keras 8, tf2onnx 8, aiohttp 6,
aiosignal 6, alembic 6, annotated-types 6, apscheduler 6, attrs 6,
backoff 6, binaryornot 6, black 6, boto3 6, click 6, coloredlogs 6,
colorlog 6, dash 6, dash-bootstrap-components 6, dlinfo 6,
exceptiongroup 6, execnet 6, fire 6, frozenlist 6, gitdb 6, google-auth
6, google-auth-oauthlib 6, hjson 6, iniconfig 6, jinja2-time 6, markdown
6, markdown-it-py 6, markupsafe 6, mpmath 6, namex 6, nbformat 6, ninja
6, nvidia-nvjitlink-cu12 6, onnxconverter-common 6, pandas 6, plac 6,
platformdirs 6, pluggy 6, portalocker 6, poyo 6, protobuf3-to-dict 6,
py-cpuinfo 6, py3nvml 6, pyarrow 6, pyarrow-hotfix 6, pydantic-core 6,
pygments 6, pynvml 6, pypng 6, python-slugify 6, responses 6,
smdebug-rulesconfig 6, soupsieve 6, sqlalchemy 6,
tensorboard-data-server 6, tensorboard-plugin-wit 6, tensorboardx 6,
threadpoolctl 6, tomli 6, wasabi 6, wcwidth 6, werkzeug 6, wheel 6,
xxhash 6, zipp 6, etils 5, tensorboard 5, beautifulsoup4 4, cffi 4,
clldutils 4, codecarbon 4, datasets 4, dill 4, evaluate 4, gitpython 4,
hf-doc-builder 4, kenlm 4, librosa 4, llvmlite 4, nest-asyncio 4, nltk
4, optuna 4, parameterized 4, phonemizer 4, psutil 4, pyctcdecode 4,
pytest 4, pytest-timeout 4, pytest-xdist 4, ray 4, rjieba 4, rouge-score
4, ruff 4, sacrebleu 4, sacremoses 4, sigopt 4, sortedcontainers 4,
tensorstore 4, timeout-decorator 4, toolz 4, torchaudio 4, accelerate 3,
audioread 3, cookiecutter 3, decorator 3, deepspeed 3, faiss-cpu 3, flax
3, fugashi 3, ipadic 3, isort 3, jax 3, jaxlib 3, joblib 3, keras-nlp 3,
lazy-loader 3, numba 3, optax 3, pooch 3, pydantic 3, pygtrie 3, rhoknp
3, scikit-learn 3, segments 3, soundfile 3, soxr 3, sudachidict-core 3,
sudachipy 3, torch 3, unidic 3, unidic-lite 3, urllib3 3, absl-py 2,
arrow 2, astunparse 2, async-timeout 2, botocore 2, cachetools 2,
certifi 2, chardet 2, charset-normalizer 2, csvw 2, dash-core-components
2, dash-html-components 2, dash-table 2, diffusers 2, dm-tree 2,
fastjsonschema 2, flask 2, flatbuffers 2, fsspec 2, ftfy 2, gast 2,
google-pasta 2, greenlet 2, grpcio 2, h5py 2, humanfriendly 2, idna 2,
importlib-metadata 2, importlib-resources 2, jinja2 2, jmespath 2,
jupyter-core 2, kagglehub 2, keras 2, keras-core 2, keras-preprocessing
2, libclang 2, mako 2, mdurl 2, ml-dtypes 2, msgpack 2, multidict 2,
mypy-extensions 2, networkx 2, nvidia-cublas-cu12 2,
nvidia-cuda-cupti-cu12 2, nvidia-cuda-nvrtc-cu12 2,
nvidia-cuda-runtime-cu12 2, nvidia-cudnn-cu12 2, nvidia-cufft-cu12 2,
nvidia-curand-cu12 2, nvidia-cusolver-cu12 2, nvidia-cusparse-cu12 2,
nvidia-nccl-cu12 2, nvidia-nvtx-cu12 2, onnx 2, onnxruntime 2,
onnxruntime-tools 2, opencv-python 2, opt-einsum 2, orbax-checkpoint 2,
pathspec 2, plotly 2, pox 2, ppft 2, pyasn1-modules 2, pycparser 2,
pyrsistent 2, python-dateutil 2, pytz 2, requests-oauthlib 2, retrying
2, rich 2, rsa 2, s3transfer 2, scipy 2, setuptools 2, six 2, smmap 2,
sympy 2, tabulate 2, tensorflow-estimator 2, tensorflow-hub 2,
tensorflow-io-gcs-filesystem 2, termcolor 2, text-unidecode 2, traitlets
2, triton 2, typing-extensions 2, tzdata 2, tzlocal 2, wrapt 2,
xmltodict 2, yarl 2, Python 1, av 1, babel 1, bibtexparser 1, blinker 1,
colorama 1, decord 1, filelock 1, huggingface-hub 1, isodate 1,
itsdangerous 1, language-tags 1, lxml 1, numpy 1, oauthlib 1, packaging
1, pillow 1, protobuf 1, pyasn1 1, pylatexenc 1, pyparsing 1, pyyaml 1,
rdflib 1, regex 1, requests 1, rfc3986 1, root 1, safetensors 1,
sentencepiece 1, tenacity 1, timm 1, tokenizers 1, torchvision 1, tqdm
1, transformers 1, types-python-dateutil 1, uritemplate 1


You can reproduce them with python 3.10 and:
```
RUST_LOG=uv_resolver=debug cargo run pip compile -o /dev/null -q scripts/requirements/<input>.in 2>&1 | tail -n 1
```

Closes #2270 - This is less invasive compared to the other PR, we can
revisit number of network/build request tracking later.
This commit is contained in:
konsti 2024-04-17 11:08:21 +02:00 committed by GitHub
parent 64b545d954
commit d1b07a3f49
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 31 additions and 1 deletions

View file

@ -1,5 +1,6 @@
use std::cmp::min;
use itertools::Itertools;
use pubgrub::range::Range;
use rustc_hash::FxHashMap;
use tokio::sync::mpsc::Sender;
@ -158,6 +159,10 @@ impl BatchPrefetcher {
/// Each time we tried a version for a package, we register that here.
pub(crate) fn version_tried(&mut self, package: PubGrubPackage) {
// Only track base packages, no virtual packages from extras.
if matches!(package, PubGrubPackage::Package(_, Some(_), _)) {
return;
}
*self.tried_versions.entry(package).or_default() += 1;
}
@ -173,4 +178,23 @@ impl BatchPrefetcher {
|| (num_tried >= 20 && num_tried - previous_prefetch >= 20);
(num_tried, do_prefetch)
}
/// Log stats about how many versions we tried.
///
/// Note that they may be inflated when we count the same version repeatedly during
/// backtracking.
pub(crate) fn log_tried_versions(&self) {
let total_versions: usize = self.tried_versions.values().sum();
let mut tried_versions: Vec<_> = self.tried_versions.iter().collect();
tried_versions.sort_by(|(p1, c1), (p2, c2)| {
c1.cmp(c2)
.reverse()
.then(p1.to_string().cmp(&p2.to_string()))
});
let counts = tried_versions
.iter()
.map(|(package, count)| format!("{package} {count}"))
.join(", ");
debug!("Tried {total_versions} versions: {counts}");
}
}

View file

@ -14,7 +14,7 @@ use pubgrub::range::Range;
use pubgrub::solver::{Incompatibility, State};
use rustc_hash::{FxHashMap, FxHashSet};
use tokio_stream::wrappers::ReceiverStream;
use tracing::{debug, info_span, instrument, trace, warn, Instrument};
use tracing::{debug, enabled, info_span, instrument, trace, warn, Instrument, Level};
use distribution_types::{
BuiltDist, Dist, DistributionMetadata, IncompatibleDist, IncompatibleSource, IncompatibleWheel,
@ -323,6 +323,9 @@ impl<
priorities.get(package).unwrap_or_default()
})
else {
if enabled!(Level::DEBUG) {
prefetcher.log_tried_versions();
}
let selection = state.partial_solution.extract_solution();
return ResolutionGraph::from_state(
&selection,
@ -482,6 +485,9 @@ impl<
.iter()
.any(|(dependency, _)| dependency == package) =>
{
if enabled!(Level::DEBUG) {
prefetcher.log_tried_versions();
}
return Err(PubGrubError::SelfDependency {
package: package.clone(),
version: version.clone(),