From 2c139d6fca3073db59633cfe84b4de09de461777 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 6 Sep 2024 14:07:57 -0400 Subject: [PATCH] pep508: implement marker simplification/complexification This adds new routines to `MarkerTree` for "simplifying" and "complexifying" a tree with respect to lower and upper Python version bounds. In effect, "simplifying" a marker is what you do when you write it to a lock file. Namely, since `uv.lock` includes a `requires-python` bound at the top, one can say that it acts as a bound on the supported Python versions. That is, it establishes a context in which one can assume that bound is true. Therefore, the markers we write can be simplified using this assumption. The reverse is "complexifying" a marker, and it's what you do when you read a marker from the lock file. Namely, once a marker is read, it can be very difficult in code to keep the corresponding requires-python context from the lock file. If you lose track of it and decide to operate on the "simplified" marker, then it's trivial for that to produce an incorrect result. --- crates/pep508-rs/src/marker/algebra.rs | 198 ++++++++++++++--- crates/pep508-rs/src/marker/tree.rs | 282 +++++++++++++++++++++---- 2 files changed, 414 insertions(+), 66 deletions(-) diff --git a/crates/pep508-rs/src/marker/algebra.rs b/crates/pep508-rs/src/marker/algebra.rs index 57b707334..96e705ca8 100644 --- a/crates/pep508-rs/src/marker/algebra.rs +++ b/crates/pep508-rs/src/marker/algebra.rs @@ -371,43 +371,187 @@ impl InternerGuard<'_> { self.create_node(node.var.clone(), children) } - // Restrict the output of a given version variable in the tree. - // - // If the provided function `f` returns a `Some` range, the tree will be simplified with - // the assumption that the given variable is restricted to values in that range. If the function - // returns `None`, the variable will not be affected. - pub(crate) fn restrict_versions( + /// Simplify this tree by *assuming* that the Python version range provided + /// is true and that the complement of it is false. + /// + /// For example, with `requires-python = '>=3.8'` and a marker tree of + /// `python_full_version >= '3.8' and python_full_version <= '3.10'`, this + /// would result in a marker of `python_full_version <= '3.10'`. + pub(crate) fn simplify_python_versions( &mut self, i: NodeId, - f: &impl Fn(&Variable) -> Option>, + py_lower: Bound<&Version>, + py_upper: Bound<&Version>, ) -> NodeId { - if matches!(i, NodeId::TRUE | NodeId::FALSE) { + if matches!(i, NodeId::TRUE | NodeId::FALSE) + || matches!((py_lower, py_upper), (Bound::Unbounded, Bound::Unbounded)) + { return i; } let node = self.shared.node(i); - if let Edges::Version { edges: ref map } = node.children { - if let Some(allowed) = f(&node.var) { - // Restrict the output of this variable to the given range. - let mut simplified = SmallVec::new(); - for (range, node) in map { - let restricted = range.intersection(&allowed); - if restricted.is_empty() { - continue; - } - - simplified.push((restricted.clone(), *node)); - } - - return self - .create_node(node.var.clone(), Edges::Version { edges: simplified }) - .negate(i); + // Look for a `python_full_version` expression, otherwise + // we recursively simplify. + let Node { + var: Variable::Version(MarkerValueVersion::PythonFullVersion), + children: Edges::Version { ref edges }, + } = node + else { + // Simplify all nodes recursively. + let children = node.children.map(i, |node_id| { + self.simplify_python_versions(node_id, py_lower, py_upper) + }); + return self.create_node(node.var.clone(), children); + }; + let py_range = Range::from_range_bounds((py_lower.cloned(), py_upper.cloned())); + if py_range.is_empty() { + // Oops, the bounds imply there is nothing that can match, + // so we always evaluate to false. + return NodeId::FALSE; + } + let mut new = SmallVec::new(); + for &(ref range, node) in edges { + let overlap = range.intersection(&py_range); + if overlap.is_empty() { + continue; } + new.push((overlap.clone(), node)); } - // Restrict all nodes recursively. - let children = node.children.map(i, |node| self.restrict_versions(node, f)); - self.create_node(node.var.clone(), children) + // Now that we know the only ranges left are those that + // intersect with our lower/upper Python version bounds, we + // can "extend" out the lower/upper bounds here all the way to + // negative and positive infinity, respectively. + // + // This has the effect of producing a marker that is only + // applicable in a context where the Python lower/upper bounds + // are known to be satisfied. + let &(ref first_range, first_node_id) = new.first().unwrap(); + let first_upper = first_range.bounding_range().unwrap().1; + let clipped = Range::from_range_bounds((Bound::Unbounded, first_upper.cloned())); + *new.first_mut().unwrap() = (clipped, first_node_id); + + let &(ref last_range, last_node_id) = new.last().unwrap(); + let last_lower = last_range.bounding_range().unwrap().0; + let clipped = Range::from_range_bounds((last_lower.cloned(), Bound::Unbounded)); + *new.last_mut().unwrap() = (clipped, last_node_id); + + self.create_node(node.var.clone(), Edges::Version { edges: new }) + .negate(i) + } + + /// Complexify this tree by requiring the given Python version + /// range to be true in order for this marker tree to evaluate to + /// true in all circumstances. + /// + /// For example, with `requires-python = '>=3.8'` and a marker tree of + /// `python_full_version <= '3.10'`, this would result in a marker of + /// `python_full_version >= '3.8' and python_full_version <= '3.10'`. + pub(crate) fn complexify_python_versions( + &mut self, + i: NodeId, + py_lower: Bound<&Version>, + py_upper: Bound<&Version>, + ) -> NodeId { + if matches!(i, NodeId::FALSE) + || matches!((py_lower, py_upper), (Bound::Unbounded, Bound::Unbounded)) + { + return i; + } + + let py_range = Range::from_range_bounds((py_lower.cloned(), py_upper.cloned())); + if py_range.is_empty() { + // Oops, the bounds imply there is nothing that can match, + // so we always evaluate to false. + return NodeId::FALSE; + } + if matches!(i, NodeId::TRUE) { + let var = Variable::Version(MarkerValueVersion::PythonFullVersion); + let edges = Edges::Version { + edges: Edges::from_range(&py_range), + }; + return self.create_node(var, edges).negate(i); + } + + let node = self.shared.node(i); + let Node { + var: Variable::Version(MarkerValueVersion::PythonFullVersion), + children: Edges::Version { ref edges }, + } = node + else { + // Complexify all nodes recursively. + let children = node.children.map(i, |node_id| { + self.complexify_python_versions(node_id, py_lower, py_upper) + }); + return self.create_node(node.var.clone(), children); + }; + // The approach we take here is to filter out any range that + // has no intersection with our Python lower/upper bounds. + // These ranges will now always be false, so we can dismiss + // them entirely. + // + // Then, depending on whether we have finite lower/upper bound, + // we "fix up" the edges by clipping the existing ranges and + // adding an additional range that covers the Python versions + // outside of our bounds by always mapping them to false. + let mut new: SmallVec<_> = edges + .iter() + .filter(|(range, _)| !py_range.intersection(range).is_empty()) + .cloned() + .collect(); + // Below, we assume `new` has at least one element. It's + // subtle, but since 1) edges is a disjoint covering of the + // universe and 2) our `py_range` is non-empty at this point, + // it must intersect with at least one range. + assert!( + !new.is_empty(), + "expected at least one non-empty intersection" + ); + // This is the NodeId we map to anything that should + // always be false. We have to "negate" it in case the + // parent is negated. + let exclude_node_id = NodeId::FALSE.negate(i); + if !matches!(py_lower, Bound::Unbounded) { + let &(ref first_range, first_node_id) = new.first().unwrap(); + let first_upper = first_range.bounding_range().unwrap().1; + // When the first range is always false, then we can just + // "expand" it out to negative infinity to reflect that + // anything less than our lower bound should evaluate to + // false. If we don't do this, then we could have two + // adjacent ranges map to the same node, which would not be + // a canonical representation. + if exclude_node_id == first_node_id { + let clipped = Range::from_range_bounds((Bound::Unbounded, first_upper.cloned())); + *new.first_mut().unwrap() = (clipped, first_node_id); + } else { + let clipped = Range::from_range_bounds((py_lower.cloned(), first_upper.cloned())); + *new.first_mut().unwrap() = (clipped, first_node_id); + + let py_range_lower = + Range::from_range_bounds((py_lower.cloned(), Bound::Unbounded)); + new.insert(0, (py_range_lower.complement(), NodeId::FALSE.negate(i))); + } + } + if !matches!(py_upper, Bound::Unbounded) { + let &(ref last_range, last_node_id) = new.last().unwrap(); + let last_lower = last_range.bounding_range().unwrap().0; + // See lower bound case above for why we do this. The + // same reasoning applies here: to maintain a canonical + // representation. + if exclude_node_id == last_node_id { + let clipped = Range::from_range_bounds((last_lower.cloned(), Bound::Unbounded)); + *new.last_mut().unwrap() = (clipped, last_node_id); + } else { + let clipped = Range::from_range_bounds((last_lower.cloned(), py_upper.cloned())); + *new.last_mut().unwrap() = (clipped, last_node_id); + + let py_range_upper = + Range::from_range_bounds((Bound::Unbounded, py_upper.cloned())); + new.push((py_range_upper.complement(), exclude_node_id)); + } + } + self.create_node(node.var.clone(), Edges::Version { edges: new }) + .negate(i) } } diff --git a/crates/pep508-rs/src/marker/tree.rs b/crates/pep508-rs/src/marker/tree.rs index 6a4a6aa70..77d5f0d24 100644 --- a/crates/pep508-rs/src/marker/tree.rs +++ b/crates/pep508-rs/src/marker/tree.rs @@ -1132,25 +1132,63 @@ impl MarkerTree { extra_expression } - /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. + /// Simplify this marker by *assuming* that the Python version range + /// provided is true and that the complement of it is false. /// - /// Any `extra` markers that are always `true` given the provided extras will be removed. - /// Any `extra` markers that are always `false` given the provided extras will be left - /// unchanged. + /// For example, with `requires-python = '>=3.8'` and a marker tree of + /// `python_full_version >= '3.8' and python_full_version <= '3.10'`, this + /// would result in a marker of `python_full_version <= '3.10'`. /// - /// For example, if `dev` is a provided extra, given `sys_platform == 'linux' and extra == 'dev'`, - /// the marker will be simplified to `sys_platform == 'linux'`. + /// This is useful when one wants to write "simpler" markers in a + /// particular context with a bound on the supported Python versions. + /// In general, the simplified markers returned shouldn't be used for + /// evaluation. Instead, they should be turned back into their more + /// "complex" form first. + /// + /// Note that simplifying a marker and then complexifying it, even + /// with the same Python version bounds, is a lossy operation. For + /// example, simplifying `python_version < '3.7'` with `requires-python + /// = ">=3.8"` will result in a marker that always returns false (e.g., + /// `python_version < '0'`). Therefore, complexifying an always-false + /// marker will result in a marker that is still always false, despite + /// the fact that the original marker was true for `<3.7`. Therefore, + /// simplifying should only be done as a one-way transformation when it is + /// known that `requires-python` reflects an eternal lower bound on the + /// results of that simplification. (If `requires-python` changes, then one + /// should reconstitute all relevant markers from the source data.) #[must_use] #[allow(clippy::needless_pass_by_value)] - pub fn simplify_python_versions(self, python_version: Range) -> MarkerTree { - MarkerTree(INTERNER.lock().restrict_versions(self.0, &|var| match var { - // Note that `python_version` is normalized to `python_full_version` internally by the - // decision diagram. - Variable::Version(MarkerValueVersion::PythonFullVersion) => { - Some(python_version.clone()) - } - _ => None, - })) + pub fn simplify_python_versions( + self, + lower: Bound<&Version>, + upper: Bound<&Version>, + ) -> MarkerTree { + MarkerTree( + INTERNER + .lock() + .simplify_python_versions(self.0, lower, upper), + ) + } + + /// Complexify marker tree by requiring the given Python version range + /// to be true in order for this marker tree to evaluate to true in all + /// circumstances. + /// + /// For example, with `requires-python = '>=3.8'` and a marker tree of + /// `python_full_version <= '3.10'`, this would result in a marker of + /// `python_full_version >= '3.8' and python_full_version <= '3.10'`. + #[must_use] + #[allow(clippy::needless_pass_by_value)] + pub fn complexify_python_versions( + self, + lower: Bound<&Version>, + upper: Bound<&Version>, + ) -> MarkerTree { + MarkerTree( + INTERNER + .lock() + .complexify_python_versions(self.0, lower, upper), + ) } /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`. @@ -1636,7 +1674,6 @@ mod test { use insta::assert_snapshot; use pep440_rs::Version; - use pubgrub::Range; use uv_normalize::ExtraName; use crate::marker::{MarkerEnvironment, MarkerEnvironmentBuilder}; @@ -1711,19 +1748,19 @@ mod test { assert_eq!( m("(python_version <= '3.11' and sys_platform == 'win32') or python_version > '3.11'") - .simplify_python_versions(Range::from_range_bounds(( - Bound::Excluded(Version::new([3, 12])), - Bound::Unbounded, - ))), + .simplify_python_versions( + Bound::Excluded(Version::new([3, 12])).as_ref(), + Bound::Unbounded.as_ref(), + ), MarkerTree::TRUE ); assert_eq!( m("python_version < '3.10'") - .simplify_python_versions(Range::from_range_bounds(( - Bound::Excluded(Version::new([3, 7])), - Bound::Unbounded, - ))) + .simplify_python_versions( + Bound::Excluded(Version::new([3, 7])).as_ref(), + Bound::Unbounded.as_ref(), + ) .try_to_string() .unwrap(), "python_full_version < '3.10'" @@ -1732,28 +1769,29 @@ mod test { // Note that `3.12.1` will still match. assert_eq!( m("python_version <= '3.12'") - .simplify_python_versions(Range::from_range_bounds(( - Bound::Excluded(Version::new([3, 12])), - Bound::Unbounded, - ))) + .simplify_python_versions( + Bound::Excluded(Version::new([3, 12])).as_ref(), + Bound::Unbounded.as_ref(), + ) .try_to_string() .unwrap(), "python_full_version < '3.13'" ); assert_eq!( - m("python_full_version <= '3.12'").simplify_python_versions(Range::from_range_bounds( - (Bound::Excluded(Version::new([3, 12])), Bound::Unbounded,) - )), + m("python_full_version <= '3.12'").simplify_python_versions( + Bound::Excluded(Version::new([3, 12])).as_ref(), + Bound::Unbounded.as_ref(), + ), MarkerTree::FALSE ); assert_eq!( m("python_full_version <= '3.12.1'") - .simplify_python_versions(Range::from_range_bounds(( - Bound::Excluded(Version::new([3, 12])), - Bound::Unbounded, - ))) + .simplify_python_versions( + Bound::Excluded(Version::new([3, 12])).as_ref(), + Bound::Unbounded.as_ref(), + ) .try_to_string() .unwrap(), "python_full_version <= '3.12.1'" @@ -2521,10 +2559,9 @@ mod test { #[test] fn test_requires_python() { fn simplified(marker: &str) -> MarkerTree { - m(marker).simplify_python_versions(Range::from_range_bounds(( - Bound::Included(Version::new([3, 8])), - Bound::Unbounded, - ))) + let lower = Bound::Included(Version::new([3, 8])); + let upper = Bound::Unbounded; + m(marker).simplify_python_versions(lower.as_ref(), upper.as_ref()) } assert_eq!(simplified("python_version >= '3.8'"), MarkerTree::TRUE); @@ -2786,4 +2823,171 @@ mod test { let (left, right) = (m(left.as_ref()), m(right.as_ref())); left.is_disjoint(&right) && right.is_disjoint(&left) } + + #[test] + fn complexified_markers() { + // Takes optional lower (inclusive) and upper (exclusive) + // bounds representing `requires-python` and a "simplified" + // marker, and returns the "complexified" marker. That is, a + // marker that embeds the `requires-python` constraint into it. + let complexify = + |lower: Option<[u64; 2]>, upper: Option<[u64; 2]>, marker: &str| -> MarkerTree { + let lower = lower + .map(|release| Bound::Included(Version::new(release))) + .unwrap_or(Bound::Unbounded); + let upper = upper + .map(|release| Bound::Excluded(Version::new(release))) + .unwrap_or(Bound::Unbounded); + m(marker).complexify_python_versions(lower.as_ref(), upper.as_ref()) + }; + + assert_eq!( + complexify(None, None, "python_full_version < '3.10'"), + m("python_full_version < '3.10'"), + ); + assert_eq!( + complexify(Some([3, 8]), None, "python_full_version < '3.10'"), + m("python_full_version >= '3.8' and python_full_version < '3.10'"), + ); + assert_eq!( + complexify(None, Some([3, 8]), "python_full_version < '3.10'"), + m("python_full_version < '3.8'"), + ); + assert_eq!( + complexify(Some([3, 8]), Some([3, 8]), "python_full_version < '3.10'"), + // Kinda weird, but this normalizes to `false`, just like the above. + m("python_full_version < '0' and python_full_version > '0'"), + ); + + assert_eq!( + complexify(Some([3, 11]), None, "python_full_version < '3.10'"), + // Kinda weird, but this normalizes to `false`, just like the above. + m("python_full_version < '0' and python_full_version > '0'"), + ); + assert_eq!( + complexify(Some([3, 11]), None, "python_full_version >= '3.10'"), + m("python_full_version >= '3.11'"), + ); + assert_eq!( + complexify(Some([3, 11]), None, "python_full_version >= '3.12'"), + m("python_full_version >= '3.12'"), + ); + + assert_eq!( + complexify(None, Some([3, 11]), "python_full_version > '3.12'"), + // Kinda weird, but this normalizes to `false`, just like the above. + m("python_full_version < '0' and python_full_version > '0'"), + ); + assert_eq!( + complexify(None, Some([3, 11]), "python_full_version <= '3.12'"), + m("python_full_version < '3.11'"), + ); + assert_eq!( + complexify(None, Some([3, 11]), "python_full_version <= '3.10'"), + m("python_full_version <= '3.10'"), + ); + + assert_eq!( + complexify(Some([3, 11]), None, "python_full_version == '3.8'"), + // Kinda weird, but this normalizes to `false`, just like the above. + m("python_full_version < '0' and python_full_version > '0'"), + ); + assert_eq!( + complexify( + Some([3, 11]), + None, + "python_full_version == '3.8' or python_full_version == '3.12'" + ), + m("python_full_version == '3.12'"), + ); + assert_eq!( + complexify( + Some([3, 11]), + None, + "python_full_version == '3.8' \ + or python_full_version == '3.11' \ + or python_full_version == '3.12'" + ), + m("python_full_version == '3.11' or python_full_version == '3.12'"), + ); + + // Tests a tricky case where if a marker is always true, then + // complexifying it will proceed correctly by adding the + // requires-python constraint. This is a regression test for + // an early implementation that special cased the "always + // true" case to return "always true" regardless of the + // requires-python bounds. + assert_eq!( + complexify( + Some([3, 12]), + None, + "python_full_version < '3.10' or python_full_version >= '3.10'" + ), + m("python_full_version >= '3.12'"), + ); + } + + #[test] + fn simplified_markers() { + // Takes optional lower (inclusive) and upper (exclusive) + // bounds representing `requires-python` and a "complexified" + // marker, and returns the "simplified" marker. That is, a + // marker that assumes `requires-python` is true. + let simplify = + |lower: Option<[u64; 2]>, upper: Option<[u64; 2]>, marker: &str| -> MarkerTree { + let lower = lower + .map(|release| Bound::Included(Version::new(release))) + .unwrap_or(Bound::Unbounded); + let upper = upper + .map(|release| Bound::Excluded(Version::new(release))) + .unwrap_or(Bound::Unbounded); + m(marker).simplify_python_versions(lower.as_ref(), upper.as_ref()) + }; + + assert_eq!( + simplify( + Some([3, 8]), + None, + "python_full_version >= '3.8' and python_full_version < '3.10'" + ), + m("python_full_version < '3.10'"), + ); + assert_eq!( + simplify(Some([3, 8]), None, "python_full_version < '3.7'"), + // Kinda weird, but this normalizes to `false`, just like the above. + m("python_full_version < '0' and python_full_version > '0'"), + ); + assert_eq!( + simplify( + Some([3, 8]), + Some([3, 11]), + "python_full_version == '3.7.*' \ + or python_full_version == '3.8.*' \ + or python_full_version == '3.10.*' \ + or python_full_version == '3.11.*' \ + " + ), + // Given `requires-python = '>=3.8,<3.11'`, only `3.8.*` + // and `3.10.*` can possibly be true. So this simplifies + // to `!= 3.9.*`. + m("python_full_version != '3.9.*'"), + ); + assert_eq!( + simplify( + Some([3, 8]), + None, + "python_full_version >= '3.8' and sys_platform == 'win32'" + ), + m("sys_platform == 'win32'"), + ); + assert_eq!( + simplify( + Some([3, 8]), + None, + "python_full_version >= '3.9' \ + and (sys_platform == 'win32' or python_full_version >= '3.8')", + ), + m("python_full_version >= '3.9'"), + ); + } }