Option to resolve at a fixed timestamp with pip-compile --exclude-newer YYYY-MM-DD (#434)

This works by filtering out files with a more recent upload time, so if
the index you use does not provide upload times, the results might be
inaccurate. pypi provides upload times for all files. This is, the field
is non-nullable in the warehouse schema, but the simple API PEP does not
know this field.

If you have only pypi dependencies, this means deterministic,
reproducible(!) resolution. We could try doing the same for git repos
but it doesn't seem worth the effort, i'd recommend pinning commits
since git histories are arbitrarily malleable and also if you care about
reproducibility and such you such not use git dependencies but a custom
index.

Timestamps are given either as RFC 3339 timestamps such as
`2006-12-02T02:07:43Z` or as UTC dates in the same format such as
`2006-12-02`. Dates are interpreted as including this day, i.e. until
midnight UTC that day. Date only is required to make this ergonomic and
midnight seems like an ergonomic choice.

In action for `pandas`:

```console
$ target/debug/puffin pip-compile --exclude-newer 2023-11-16 target/pandas.in
Resolved 6 packages in 679ms
# This file was autogenerated by Puffin v0.0.1 via the following command:
#    target/debug/puffin pip-compile --exclude-newer 2023-11-16 target/pandas.in
numpy==1.26.2
    # via pandas
pandas==2.1.3
python-dateutil==2.8.2
    # via pandas
pytz==2023.3.post1
    # via pandas
six==1.16.0
    # via python-dateutil
tzdata==2023.3
    # via pandas
$ target/debug/puffin pip-compile --exclude-newer 2022-11-16 target/pandas.in
Resolved 5 packages in 655ms
# This file was autogenerated by Puffin v0.0.1 via the following command:
#    target/debug/puffin pip-compile --exclude-newer 2022-11-16 target/pandas.in
numpy==1.23.4
    # via pandas
pandas==1.5.1
python-dateutil==2.8.2
    # via pandas
pytz==2022.6
    # via pandas
six==1.16.0
    # via python-dateutil
$ target/debug/puffin pip-compile --exclude-newer 2021-11-16 target/pandas.in
Resolved 5 packages in 594ms
# This file was autogenerated by Puffin v0.0.1 via the following command:
#    target/debug/puffin pip-compile --exclude-newer 2021-11-16 target/pandas.in
numpy==1.21.4
    # via pandas
pandas==1.3.4
python-dateutil==2.8.2
    # via pandas
pytz==2021.3
    # via pandas
six==1.16.0
    # via python-dateutil
```
This commit is contained in:
konsti 2023-11-16 20:46:17 +01:00 committed by GitHub
parent 0d455ebd06
commit e41ec12239
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 218 additions and 38 deletions

6
Cargo.lock generated
View file

@ -504,7 +504,10 @@ checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-targets 0.48.5",
]
@ -2341,6 +2344,7 @@ dependencies = [
"assert_fs",
"bitflags 2.4.1",
"cacache",
"chrono",
"clap",
"colored",
"directories",
@ -2567,6 +2571,7 @@ version = "0.0.1"
dependencies = [
"anyhow",
"bitflags 2.4.1",
"chrono",
"clap",
"colored",
"derivative",
@ -2703,6 +2708,7 @@ dependencies = [
name = "pypi-types"
version = "0.0.1"
dependencies = [
"chrono",
"indoc",
"insta",
"mailparse",

View file

@ -24,6 +24,7 @@ camino = { version = "1.1.6", features = ["serde1"] }
clap = { version = "4.4.7" }
colored = { version = "2.0.4" }
configparser = { version = "3.0.2" }
chrono = { version = "0.4.31" }
csv = { version = "1.3.0" }
data-encoding = { version = "2.4.0" }
directories = { version = "5.0.1" }

View file

@ -36,6 +36,7 @@ anstream = { workspace = true }
anyhow = { workspace = true }
bitflags = { workspace = true }
cacache = { workspace = true }
chrono = { workspace = true }
clap = { workspace = true, features = ["derive"] }
colored = { workspace = true }
directories = { workspace = true }

View file

@ -7,6 +7,7 @@ use std::str::FromStr;
use std::{env, fs};
use anyhow::{anyhow, Result};
use chrono::{DateTime, Utc};
use colored::Colorize;
use itertools::Itertools;
use tracing::debug;
@ -42,6 +43,7 @@ pub(crate) async fn pip_compile(
index_urls: Option<IndexUrls>,
no_build: bool,
python_version: Option<PythonVersion>,
exclude_newer: Option<DateTime<Utc>>,
cache: &Path,
mut printer: Printer,
) -> Result<ExitStatus> {
@ -102,6 +104,7 @@ pub(crate) async fn pip_compile(
resolution_mode,
prerelease_mode,
project,
exclude_newer,
);
// Detect the current Python interpreter.

View file

@ -1,8 +1,10 @@
use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::process::ExitCode;
use std::str::FromStr;
use anyhow::Result;
use chrono::{DateTime, Days, NaiveDate, NaiveTime, Utc};
use clap::{Args, Parser, Subcommand};
use colored::Colorize;
use directories::ProjectDirs;
@ -85,6 +87,24 @@ enum Commands {
Remove(RemoveArgs),
}
/// Clap parser for the union of date and datetime
fn date_or_datetime(input: &str) -> Result<DateTime<Utc>, String> {
let date_err = match NaiveDate::from_str(input) {
Ok(date) => {
// Midnight that day is 00:00:00 the next day
return Ok((date + Days::new(1)).and_time(NaiveTime::MIN).and_utc());
}
Err(err) => err,
};
let datetime_err = match DateTime::parse_from_rfc3339(input) {
Ok(datetime) => return Ok(datetime.with_timezone(&Utc)),
Err(err) => err,
};
Err(format!(
"Neither a valid date ({date_err}) not a valid datetime ({datetime_err})"
))
}
#[derive(Args)]
#[allow(clippy::struct_excessive_bools)]
struct PipCompileArgs {
@ -130,14 +150,28 @@ struct PipCompileArgs {
#[clap(long)]
upgrade: bool,
/// Don't build source distributions. This means resolving will not run arbitrary code. The
/// cached wheels of already built source distributions will be reused.
/// Don't build source distributions.
///
/// This means resolving will not run arbitrary code. The cached wheels of already built source
/// distributions will be reused.
#[clap(long)]
no_build: bool,
/// The minimum Python version that should be supported.
#[arg(long, short, value_enum)]
python_version: Option<PythonVersion>,
/// Try to resolve at a past time.
///
/// This works by filtering out files with a more recent upload time, so if the index you use
/// does not provide upload times, the results might be inaccurate. pypi provides upload times
/// for all files.
///
/// Timestamps are given either as RFC 3339 timestamps such as `2006-12-02T02:07:43Z` or as
/// UTC dates in the same format such as `2006-12-02`. Dates are interpreted as including this
/// day, i.e. until midnight UTC that day.
#[arg(long, value_parser = date_or_datetime)]
exclude_newer: Option<DateTime<Utc>>,
}
#[derive(Args)]
@ -272,6 +306,7 @@ async fn inner() -> Result<ExitStatus> {
index_urls,
args.no_build,
args.python_version,
args.exclude_newer,
&cache_dir,
printer,
)

View file

@ -31,6 +31,31 @@ fn make_venv_py312(temp_dir: &TempDir, cache_dir: &TempDir) -> PathBuf {
venv.to_path_buf()
}
/// Resolve a specific version of Django from a `requirements.in` file.
#[test]
fn compile_requirements_in() -> Result<()> {
let temp_dir = TempDir::new()?;
let cache_dir = TempDir::new()?;
let venv = make_venv_py312(&temp_dir, &cache_dir);
let requirements_in = temp_dir.child("requirements.in");
requirements_in.write_str("django==5.0b1")?;
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
#[test]
fn missing_requirements_in() -> Result<()> {
let temp_dir = TempDir::new()?;
@ -68,31 +93,6 @@ fn missing_venv() -> Result<()> {
Ok(())
}
/// Resolve a specific version of Django from a `requirements.in` file.
#[test]
fn compile_requirements_in() -> Result<()> {
let temp_dir = TempDir::new()?;
let cache_dir = TempDir::new()?;
let venv = make_venv_py312(&temp_dir, &cache_dir);
let requirements_in = temp_dir.child("requirements.in");
requirements_in.write_str("django==5.0b1")?;
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir));
});
Ok(())
}
/// Resolve a specific version of Django from a `pyproject.toml` file.
#[test]
fn compile_pyproject_toml() -> Result<()> {
@ -1206,11 +1206,7 @@ dependencies = ["django==5.0b1", "django==5.0a1"]
)?;
insta::with_settings!({
filters => vec![
(r"\d(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -1244,11 +1240,7 @@ dependencies = ["django==300.1.4"]
)?;
insta::with_settings!({
filters => vec![
(r"\d(ms|s)", "[TIME]"),
(r"# .* pip-compile", "# [BIN_PATH] pip-compile"),
(r"--cache-dir .*", "--cache-dir [CACHE_DIR]"),
]
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
@ -1261,3 +1253,92 @@ dependencies = ["django==300.1.4"]
Ok(())
}
/// Resolve at a specific time in the past
#[test]
fn compile_exclude_newer() -> Result<()> {
let temp_dir = TempDir::new()?;
let cache_dir = TempDir::new()?;
let venv = make_venv_py312(&temp_dir, &cache_dir);
let requirements_in = temp_dir.child("requirements.in");
requirements_in.write_str("tqdm")?;
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--exclude-newer")
// 4.64.0: 2022-04-04T01:48:46.194635Z1
// 4.64.1: 2022-09-03T11:10:27.148080Z
.arg("2022-04-04T12:00:00Z")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# puffin pip-compile requirements.in --exclude-newer 2022-04-04T12:00:00Z --cache-dir [CACHE_DIR]
tqdm==4.64.0
----- stderr -----
Resolved 1 package in [TIME]
"###);
});
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
// Use a date as input instead.
// We interpret a date as including this day
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--exclude-newer")
.arg("2022-04-04")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: true
exit_code: 0
----- stdout -----
# This file was autogenerated by Puffin v0.0.1 via the following command:
# puffin pip-compile requirements.in --exclude-newer 2022-04-04 --cache-dir [CACHE_DIR]
tqdm==4.64.0
----- stderr -----
Resolved 1 package in [TIME]
"###);
});
insta::with_settings!({
filters => INSTA_FILTERS.to_vec()
}, {
// Check the error message for invalid datetime
assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME))
.arg("pip-compile")
.arg("requirements.in")
.arg("--exclude-newer")
.arg("2022-04-04+02:00")
.arg("--cache-dir")
.arg(cache_dir.path())
.env("VIRTUAL_ENV", venv.as_os_str())
.current_dir(&temp_dir), @r###"
success: false
exit_code: 2
----- stdout -----
----- stderr -----
error: invalid value '2022-04-04+02:00' for '--exclude-newer <EXCLUDE_NEWER>': Neither a valid date (trailing input) not a valid datetime (input contains invalid characters)
For more information, try '--help'.
"###);
});
Ok(())
}

View file

@ -79,6 +79,8 @@ impl BuildContext for BuildDispatch {
self.interpreter_info.simple_version(),
)?;
let resolver = Resolver::new(
// TODO(konstin): Split settings (for all resolutions) and inputs (only for this
// resolution) and attach the former to Self.
Manifest::new(
requirements.to_vec(),
Vec::default(),
@ -86,6 +88,7 @@ impl BuildContext for BuildDispatch {
ResolutionMode::default(),
PreReleaseMode::default(),
None, // TODO(zanieb): We may want to provide a project name here
None,
),
self.interpreter_info.markers(),
&tags,

View file

@ -29,6 +29,7 @@ pypi-types = { path = "../pypi-types" }
anyhow = { workspace = true }
bitflags = { workspace = true }
clap = { workspace = true, features = ["derive"], optional = true }
chrono = { workspace = true }
colored = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }

View file

@ -1,3 +1,4 @@
use chrono::{DateTime, Utc};
use pep508_rs::Requirement;
use puffin_normalize::PackageName;
@ -13,6 +14,7 @@ pub struct Manifest {
pub(crate) resolution_mode: ResolutionMode,
pub(crate) prerelease_mode: PreReleaseMode,
pub(crate) project: Option<PackageName>,
pub(crate) exclude_newer: Option<DateTime<Utc>>,
}
impl Manifest {
@ -23,6 +25,7 @@ impl Manifest {
resolution_mode: ResolutionMode,
prerelease_mode: PreReleaseMode,
project: Option<PackageName>,
exclude_newer: Option<DateTime<Utc>>,
) -> Self {
Self {
requirements,
@ -31,6 +34,7 @@ impl Manifest {
resolution_mode,
prerelease_mode,
project,
exclude_newer,
}
}
}

View file

@ -3,6 +3,7 @@
use std::sync::Arc;
use anyhow::Result;
use chrono::{DateTime, Utc};
use futures::channel::mpsc::UnboundedReceiver;
use futures::{pin_mut, FutureExt, StreamExt, TryFutureExt};
use fxhash::{FxHashMap, FxHashSet};
@ -50,6 +51,7 @@ pub struct Resolver<'a, Context: BuildContext + Sync> {
client: &'a RegistryClient,
selector: CandidateSelector,
index: Arc<Index>,
exclude_newer: Option<DateTime<Utc>>,
locks: Arc<Locks>,
build_context: &'a Context,
reporter: Option<Arc<dyn Reporter>>,
@ -85,6 +87,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
constraints: manifest.constraints,
markers,
tags,
exclude_newer: manifest.exclude_newer,
client,
build_context,
reporter: None,
@ -536,6 +539,7 @@ impl<'a, Context: BuildContext + Sync> Resolver<'a, Context> {
&package_name,
self.tags,
self.build_context.interpreter_info().version(),
self.exclude_newer.as_ref(),
);
self.index.packages.insert(package_name, version_map);
}

View file

@ -2,6 +2,9 @@ use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::str::FromStr;
use chrono::{DateTime, Utc};
use tracing::warn;
use distribution_filename::{SourceDistFilename, WheelFilename};
use pep440_rs::Version;
use platform_tags::{TagPriority, Tags};
@ -22,6 +25,7 @@ impl VersionMap {
package_name: &PackageName,
tags: &Tags,
python_version: &Version,
exclude_newer: Option<&DateTime<Utc>>,
) -> Self {
let mut map = BTreeMap::default();
@ -42,6 +46,25 @@ impl VersionMap {
continue;
}
// Support resolving as if it were an earlier timestamp, at least as long files have
// upload time information
if let Some(exclude_newer) = exclude_newer {
match file.upload_time.as_ref() {
Some(upload_time) if upload_time >= exclude_newer => {
continue;
}
None => {
// TODO(konstin): Implement and use `warn_once` here.
warn!(
"{} is missing an upload date, but user provided {}",
file.filename, exclude_newer,
);
continue;
}
_ => {}
}
}
// When resolving, exclude yanked files.
// TODO(konstin): When we fail resolving due to a dependency locked to yanked version,
// we should tell the user.

View file

@ -94,6 +94,7 @@ async fn black() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -114,6 +115,7 @@ async fn black_colorama() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -134,6 +136,7 @@ async fn black_python_310() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_310, &TAGS_310).await?;
@ -156,6 +159,7 @@ async fn black_mypy_extensions() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -178,6 +182,7 @@ async fn black_mypy_extensions_extra() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -200,6 +205,7 @@ async fn black_flake8() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -220,6 +226,7 @@ async fn black_lowest() -> Result<()> {
ResolutionMode::Lowest,
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -240,6 +247,7 @@ async fn black_lowest_direct() -> Result<()> {
ResolutionMode::LowestDirect,
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -260,6 +268,7 @@ async fn black_respect_preference() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -280,6 +289,7 @@ async fn black_ignore_preference() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::default(),
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -300,6 +310,7 @@ async fn black_disallow_prerelease() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::Disallow,
None,
None,
);
let err = resolve(manifest, &MARKERS_311, &TAGS_311)
@ -322,6 +333,7 @@ async fn black_allow_prerelease_if_necessary() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::IfNecessary,
None,
None,
);
let err = resolve(manifest, &MARKERS_311, &TAGS_311)
@ -344,6 +356,7 @@ async fn pylint_disallow_prerelease() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::Disallow,
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -364,6 +377,7 @@ async fn pylint_allow_prerelease() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::Allow,
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -387,6 +401,7 @@ async fn pylint_allow_explicit_prerelease_without_marker() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::Explicit,
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;
@ -410,6 +425,7 @@ async fn pylint_allow_explicit_prerelease_with_marker() -> Result<()> {
ResolutionMode::default(),
PreReleaseMode::Explicit,
None,
None,
);
let resolution = resolve(manifest, &MARKERS_311, &TAGS_311).await?;

View file

@ -14,6 +14,7 @@ pep440_rs = { path = "../pep440-rs", features = ["serde"] }
pep508_rs = { path = "../pep508-rs", features = ["serde"] }
puffin-normalize = { path = "../puffin-normalize" }
chrono = { workspace = true, features = ["serde"] }
mailparse = { workspace = true }
once_cell = { workspace = true }
regex = { workspace = true }

View file

@ -1,3 +1,4 @@
use chrono::{DateTime, Utc};
use pep440_rs::VersionSpecifiers;
use serde::{de, Deserialize, Deserializer, Serialize};
use std::str::FromStr;
@ -28,7 +29,7 @@ pub struct File {
#[serde(deserialize_with = "deserialize_version_specifiers_lenient")]
pub requires_python: Option<VersionSpecifiers>,
pub size: Option<usize>,
pub upload_time: String,
pub upload_time: Option<DateTime<Utc>>,
pub url: String,
pub yanked: Option<Yanked>,
}