pep508: implement marker simplification/complexification

This adds new routines to `MarkerTree` for "simplifying" and "complexifying" a tree with respect to lower and upper Python version bounds. In effect, "simplifying" a marker is what you do when you write it to a lock file. Namely, since `uv.lock` includes a `requires-python` bound at the top, one can say that it acts as a bound on the supported Python versions. That is, it establishes a context in which one can assume that bound is true. Therefore, the markers we write can be simplified using this assumption. The reverse is "complexifying" a marker, and it's what you do when you read a marker from the lock file. Namely, once a marker is read, it can be very difficult in code to keep the corresponding requires-python context from the lock file. If you lose track of it and decide to operate on the "simplified" marker, then it's trivial for that to produce an incorrect result.
2025-10-17 13:58:29 +00:00 · 2024-09-06 14:07:57 -04:00 · 2024-09-06 14:07:57 -04:00 · 2c139d6fca
commit 2c139d6fca
parent 8bb0a55ff5
2 changed files with 414 additions and 66 deletions
--- a/crates/pep508-rs/src/marker/algebra.rs
+++ b/crates/pep508-rs/src/marker/algebra.rs
@ -371,43 +371,187 @@ impl InternerGuard<'_> {
        self.create_node(node.var.clone(), children)
    }

-    // Restrict the output of a given version variable in the tree.
-    //
-    // If the provided function `f` returns a `Some` range, the tree will be simplified with
-    // the assumption that the given variable is restricted to values in that range. If the function
-    // returns `None`, the variable will not be affected.
-    pub(crate) fn restrict_versions(
+    /// Simplify this tree by *assuming* that the Python version range provided
+    /// is true and that the complement of it is false.
+    ///
+    /// For example, with `requires-python = '>=3.8'` and a marker tree of
+    /// `python_full_version >= '3.8' and python_full_version <= '3.10'`, this
+    /// would result in a marker of `python_full_version <= '3.10'`.
+    pub(crate) fn simplify_python_versions(
        &mut self,
        i: NodeId,
-        f: &impl Fn(&Variable) -> Option<Range<Version>>,
+        py_lower: Bound<&Version>,
+        py_upper: Bound<&Version>,
    ) -> NodeId {
-        if matches!(i, NodeId::TRUE | NodeId::FALSE) {
+        if matches!(i, NodeId::TRUE | NodeId::FALSE)
+            || matches!((py_lower, py_upper), (Bound::Unbounded, Bound::Unbounded))
+        {
            return i;
        }

        let node = self.shared.node(i);
-        if let Edges::Version { edges: ref map } = node.children {
-            if let Some(allowed) = f(&node.var) {
-                // Restrict the output of this variable to the given range.
-                let mut simplified = SmallVec::new();
-                for (range, node) in map {
-                    let restricted = range.intersection(&allowed);
-                    if restricted.is_empty() {
-                        continue;
-                    }
-
-                    simplified.push((restricted.clone(), *node));
-                }
-
-                return self
-                    .create_node(node.var.clone(), Edges::Version { edges: simplified })
-                    .negate(i);
+        // Look for a `python_full_version` expression, otherwise
+        // we recursively simplify.
+        let Node {
+            var: Variable::Version(MarkerValueVersion::PythonFullVersion),
+            children: Edges::Version { ref edges },
+        } = node
+        else {
+            // Simplify all nodes recursively.
+            let children = node.children.map(i, |node_id| {
+                self.simplify_python_versions(node_id, py_lower, py_upper)
+            });
+            return self.create_node(node.var.clone(), children);
+        };
+        let py_range = Range::from_range_bounds((py_lower.cloned(), py_upper.cloned()));
+        if py_range.is_empty() {
+            // Oops, the bounds imply there is nothing that can match,
+            // so we always evaluate to false.
+            return NodeId::FALSE;
+        }
+        let mut new = SmallVec::new();
+        for &(ref range, node) in edges {
+            let overlap = range.intersection(&py_range);
+            if overlap.is_empty() {
+                continue;
            }
+            new.push((overlap.clone(), node));
        }

-        // Restrict all nodes recursively.
-        let children = node.children.map(i, |node| self.restrict_versions(node, f));
-        self.create_node(node.var.clone(), children)
+        // Now that we know the only ranges left are those that
+        // intersect with our lower/upper Python version bounds, we
+        // can "extend" out the lower/upper bounds here all the way to
+        // negative and positive infinity, respectively.
+        //
+        // This has the effect of producing a marker that is only
+        // applicable in a context where the Python lower/upper bounds
+        // are known to be satisfied.
+        let &(ref first_range, first_node_id) = new.first().unwrap();
+        let first_upper = first_range.bounding_range().unwrap().1;
+        let clipped = Range::from_range_bounds((Bound::Unbounded, first_upper.cloned()));
+        *new.first_mut().unwrap() = (clipped, first_node_id);
+
+        let &(ref last_range, last_node_id) = new.last().unwrap();
+        let last_lower = last_range.bounding_range().unwrap().0;
+        let clipped = Range::from_range_bounds((last_lower.cloned(), Bound::Unbounded));
+        *new.last_mut().unwrap() = (clipped, last_node_id);
+
+        self.create_node(node.var.clone(), Edges::Version { edges: new })
+            .negate(i)
+    }
+
+    /// Complexify this tree by requiring the given Python version
+    /// range to be true in order for this marker tree to evaluate to
+    /// true in all circumstances.
+    ///
+    /// For example, with `requires-python = '>=3.8'` and a marker tree of
+    /// `python_full_version <= '3.10'`, this would result in a marker of
+    /// `python_full_version >= '3.8' and python_full_version <= '3.10'`.
+    pub(crate) fn complexify_python_versions(
+        &mut self,
+        i: NodeId,
+        py_lower: Bound<&Version>,
+        py_upper: Bound<&Version>,
+    ) -> NodeId {
+        if matches!(i, NodeId::FALSE)
+            || matches!((py_lower, py_upper), (Bound::Unbounded, Bound::Unbounded))
+        {
+            return i;
+        }
+
+        let py_range = Range::from_range_bounds((py_lower.cloned(), py_upper.cloned()));
+        if py_range.is_empty() {
+            // Oops, the bounds imply there is nothing that can match,
+            // so we always evaluate to false.
+            return NodeId::FALSE;
+        }
+        if matches!(i, NodeId::TRUE) {
+            let var = Variable::Version(MarkerValueVersion::PythonFullVersion);
+            let edges = Edges::Version {
+                edges: Edges::from_range(&py_range),
+            };
+            return self.create_node(var, edges).negate(i);
+        }
+
+        let node = self.shared.node(i);
+        let Node {
+            var: Variable::Version(MarkerValueVersion::PythonFullVersion),
+            children: Edges::Version { ref edges },
+        } = node
+        else {
+            // Complexify all nodes recursively.
+            let children = node.children.map(i, |node_id| {
+                self.complexify_python_versions(node_id, py_lower, py_upper)
+            });
+            return self.create_node(node.var.clone(), children);
+        };
+        // The approach we take here is to filter out any range that
+        // has no intersection with our Python lower/upper bounds.
+        // These ranges will now always be false, so we can dismiss
+        // them entirely.
+        //
+        // Then, depending on whether we have finite lower/upper bound,
+        // we "fix up" the edges by clipping the existing ranges and
+        // adding an additional range that covers the Python versions
+        // outside of our bounds by always mapping them to false.
+        let mut new: SmallVec<_> = edges
+            .iter()
+            .filter(|(range, _)| !py_range.intersection(range).is_empty())
+            .cloned()
+            .collect();
+        // Below, we assume `new` has at least one element. It's
+        // subtle, but since 1) edges is a disjoint covering of the
+        // universe and 2) our `py_range` is non-empty at this point,
+        // it must intersect with at least one range.
+        assert!(
+            !new.is_empty(),
+            "expected at least one non-empty intersection"
+        );
+        // This is the NodeId we map to anything that should
+        // always be false. We have to "negate" it in case the
+        // parent is negated.
+        let exclude_node_id = NodeId::FALSE.negate(i);
+        if !matches!(py_lower, Bound::Unbounded) {
+            let &(ref first_range, first_node_id) = new.first().unwrap();
+            let first_upper = first_range.bounding_range().unwrap().1;
+            // When the first range is always false, then we can just
+            // "expand" it out to negative infinity to reflect that
+            // anything less than our lower bound should evaluate to
+            // false. If we don't do this, then we could have two
+            // adjacent ranges map to the same node, which would not be
+            // a canonical representation.
+            if exclude_node_id == first_node_id {
+                let clipped = Range::from_range_bounds((Bound::Unbounded, first_upper.cloned()));
+                *new.first_mut().unwrap() = (clipped, first_node_id);
+            } else {
+                let clipped = Range::from_range_bounds((py_lower.cloned(), first_upper.cloned()));
+                *new.first_mut().unwrap() = (clipped, first_node_id);
+
+                let py_range_lower =
+                    Range::from_range_bounds((py_lower.cloned(), Bound::Unbounded));
+                new.insert(0, (py_range_lower.complement(), NodeId::FALSE.negate(i)));
+            }
+        }
+        if !matches!(py_upper, Bound::Unbounded) {
+            let &(ref last_range, last_node_id) = new.last().unwrap();
+            let last_lower = last_range.bounding_range().unwrap().0;
+            // See lower bound case above for why we do this. The
+            // same reasoning applies here: to maintain a canonical
+            // representation.
+            if exclude_node_id == last_node_id {
+                let clipped = Range::from_range_bounds((last_lower.cloned(), Bound::Unbounded));
+                *new.last_mut().unwrap() = (clipped, last_node_id);
+            } else {
+                let clipped = Range::from_range_bounds((last_lower.cloned(), py_upper.cloned()));
+                *new.last_mut().unwrap() = (clipped, last_node_id);
+
+                let py_range_upper =
+                    Range::from_range_bounds((Bound::Unbounded, py_upper.cloned()));
+                new.push((py_range_upper.complement(), exclude_node_id));
+            }
+        }
+        self.create_node(node.var.clone(), Edges::Version { edges: new })
+            .negate(i)
    }
 }

--- a/crates/pep508-rs/src/marker/tree.rs
+++ b/crates/pep508-rs/src/marker/tree.rs
@ -1132,25 +1132,63 @@ impl MarkerTree {
        extra_expression
    }

-    /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`.
+    /// Simplify this marker by *assuming* that the Python version range
+    /// provided is true and that the complement of it is false.
    ///
-    /// Any `extra` markers that are always `true` given the provided extras will be removed.
-    /// Any `extra` markers that are always `false` given the provided extras will be left
-    /// unchanged.
+    /// For example, with `requires-python = '>=3.8'` and a marker tree of
+    /// `python_full_version >= '3.8' and python_full_version <= '3.10'`, this
+    /// would result in a marker of `python_full_version <= '3.10'`.
    ///
-    /// For example, if `dev` is a provided extra, given `sys_platform == 'linux' and extra == 'dev'`,
-    /// the marker will be simplified to `sys_platform == 'linux'`.
+    /// This is useful when one wants to write "simpler" markers in a
+    /// particular context with a bound on the supported Python versions.
+    /// In general, the simplified markers returned shouldn't be used for
+    /// evaluation. Instead, they should be turned back into their more
+    /// "complex" form first.
+    ///
+    /// Note that simplifying a marker and then complexifying it, even
+    /// with the same Python version bounds, is a lossy operation. For
+    /// example, simplifying `python_version < '3.7'` with `requires-python
+    /// = ">=3.8"` will result in a marker that always returns false (e.g.,
+    /// `python_version < '0'`). Therefore, complexifying an always-false
+    /// marker will result in a marker that is still always false, despite
+    /// the fact that the original marker was true for `<3.7`. Therefore,
+    /// simplifying should only be done as a one-way transformation when it is
+    /// known that `requires-python` reflects an eternal lower bound on the
+    /// results of that simplification. (If `requires-python` changes, then one
+    /// should reconstitute all relevant markers from the source data.)
    #[must_use]
    #[allow(clippy::needless_pass_by_value)]
-    pub fn simplify_python_versions(self, python_version: Range<Version>) -> MarkerTree {
-        MarkerTree(INTERNER.lock().restrict_versions(self.0, &|var| match var {
-            // Note that `python_version` is normalized to `python_full_version` internally by the
-            // decision diagram.
-            Variable::Version(MarkerValueVersion::PythonFullVersion) => {
-                Some(python_version.clone())
-            }
-            _ => None,
-        }))
+    pub fn simplify_python_versions(
+        self,
+        lower: Bound<&Version>,
+        upper: Bound<&Version>,
+    ) -> MarkerTree {
+        MarkerTree(
+            INTERNER
+                .lock()
+                .simplify_python_versions(self.0, lower, upper),
+        )
+    }
+
+    /// Complexify marker tree by requiring the given Python version range
+    /// to be true in order for this marker tree to evaluate to true in all
+    /// circumstances.
+    ///
+    /// For example, with `requires-python = '>=3.8'` and a marker tree of
+    /// `python_full_version <= '3.10'`, this would result in a marker of
+    /// `python_full_version >= '3.8' and python_full_version <= '3.10'`.
+    #[must_use]
+    #[allow(clippy::needless_pass_by_value)]
+    pub fn complexify_python_versions(
+        self,
+        lower: Bound<&Version>,
+        upper: Bound<&Version>,
+    ) -> MarkerTree {
+        MarkerTree(
+            INTERNER
+                .lock()
+                .complexify_python_versions(self.0, lower, upper),
+        )
    }

    /// Remove the extras from a marker, returning `None` if the marker tree evaluates to `true`.
@ -1636,7 +1674,6 @@ mod test {

    use insta::assert_snapshot;
    use pep440_rs::Version;
-    use pubgrub::Range;
    use uv_normalize::ExtraName;

    use crate::marker::{MarkerEnvironment, MarkerEnvironmentBuilder};
@ -1711,19 +1748,19 @@ mod test {

        assert_eq!(
            m("(python_version <= '3.11' and sys_platform == 'win32') or python_version > '3.11'")
-                .simplify_python_versions(Range::from_range_bounds((
-                    Bound::Excluded(Version::new([3, 12])),
-                    Bound::Unbounded,
-                ))),
+                .simplify_python_versions(
+                    Bound::Excluded(Version::new([3, 12])).as_ref(),
+                    Bound::Unbounded.as_ref(),
+                ),
            MarkerTree::TRUE
        );

        assert_eq!(
            m("python_version < '3.10'")
-                .simplify_python_versions(Range::from_range_bounds((
-                    Bound::Excluded(Version::new([3, 7])),
-                    Bound::Unbounded,
-                )))
+                .simplify_python_versions(
+                    Bound::Excluded(Version::new([3, 7])).as_ref(),
+                    Bound::Unbounded.as_ref(),
+                )
                .try_to_string()
                .unwrap(),
            "python_full_version < '3.10'"
@ -1732,28 +1769,29 @@ mod test {
        // Note that `3.12.1` will still match.
        assert_eq!(
            m("python_version <= '3.12'")
-                .simplify_python_versions(Range::from_range_bounds((
-                    Bound::Excluded(Version::new([3, 12])),
-                    Bound::Unbounded,
-                )))
+                .simplify_python_versions(
+                    Bound::Excluded(Version::new([3, 12])).as_ref(),
+                    Bound::Unbounded.as_ref(),
+                )
                .try_to_string()
                .unwrap(),
            "python_full_version < '3.13'"
        );

        assert_eq!(
-            m("python_full_version <= '3.12'").simplify_python_versions(Range::from_range_bounds(
-                (Bound::Excluded(Version::new([3, 12])), Bound::Unbounded,)
-            )),
+            m("python_full_version <= '3.12'").simplify_python_versions(
+                Bound::Excluded(Version::new([3, 12])).as_ref(),
+                Bound::Unbounded.as_ref(),
+            ),
            MarkerTree::FALSE
        );

        assert_eq!(
            m("python_full_version <= '3.12.1'")
-                .simplify_python_versions(Range::from_range_bounds((
-                    Bound::Excluded(Version::new([3, 12])),
-                    Bound::Unbounded,
-                )))
+                .simplify_python_versions(
+                    Bound::Excluded(Version::new([3, 12])).as_ref(),
+                    Bound::Unbounded.as_ref(),
+                )
                .try_to_string()
                .unwrap(),
            "python_full_version <= '3.12.1'"
@ -2521,10 +2559,9 @@ mod test {
    #[test]
    fn test_requires_python() {
        fn simplified(marker: &str) -> MarkerTree {
-            m(marker).simplify_python_versions(Range::from_range_bounds((
-                Bound::Included(Version::new([3, 8])),
-                Bound::Unbounded,
-            )))
+            let lower = Bound::Included(Version::new([3, 8]));
+            let upper = Bound::Unbounded;
+            m(marker).simplify_python_versions(lower.as_ref(), upper.as_ref())
        }

        assert_eq!(simplified("python_version >= '3.8'"), MarkerTree::TRUE);
@ -2786,4 +2823,171 @@ mod test {
        let (left, right) = (m(left.as_ref()), m(right.as_ref()));
        left.is_disjoint(&right) && right.is_disjoint(&left)
    }
+
+    #[test]
+    fn complexified_markers() {
+        // Takes optional lower (inclusive) and upper (exclusive)
+        // bounds representing `requires-python` and a "simplified"
+        // marker, and returns the "complexified" marker. That is, a
+        // marker that embeds the `requires-python` constraint into it.
+        let complexify =
+            |lower: Option<[u64; 2]>, upper: Option<[u64; 2]>, marker: &str| -> MarkerTree {
+                let lower = lower
+                    .map(|release| Bound::Included(Version::new(release)))
+                    .unwrap_or(Bound::Unbounded);
+                let upper = upper
+                    .map(|release| Bound::Excluded(Version::new(release)))
+                    .unwrap_or(Bound::Unbounded);
+                m(marker).complexify_python_versions(lower.as_ref(), upper.as_ref())
+            };
+
+        assert_eq!(
+            complexify(None, None, "python_full_version < '3.10'"),
+            m("python_full_version < '3.10'"),
+        );
+        assert_eq!(
+            complexify(Some([3, 8]), None, "python_full_version < '3.10'"),
+            m("python_full_version >= '3.8' and python_full_version < '3.10'"),
+        );
+        assert_eq!(
+            complexify(None, Some([3, 8]), "python_full_version < '3.10'"),
+            m("python_full_version < '3.8'"),
+        );
+        assert_eq!(
+            complexify(Some([3, 8]), Some([3, 8]), "python_full_version < '3.10'"),
+            // Kinda weird, but this normalizes to `false`, just like the above.
+            m("python_full_version < '0' and python_full_version > '0'"),
+        );
+
+        assert_eq!(
+            complexify(Some([3, 11]), None, "python_full_version < '3.10'"),
+            // Kinda weird, but this normalizes to `false`, just like the above.
+            m("python_full_version < '0' and python_full_version > '0'"),
+        );
+        assert_eq!(
+            complexify(Some([3, 11]), None, "python_full_version >= '3.10'"),
+            m("python_full_version >= '3.11'"),
+        );
+        assert_eq!(
+            complexify(Some([3, 11]), None, "python_full_version >= '3.12'"),
+            m("python_full_version >= '3.12'"),
+        );
+
+        assert_eq!(
+            complexify(None, Some([3, 11]), "python_full_version > '3.12'"),
+            // Kinda weird, but this normalizes to `false`, just like the above.
+            m("python_full_version < '0' and python_full_version > '0'"),
+        );
+        assert_eq!(
+            complexify(None, Some([3, 11]), "python_full_version <= '3.12'"),
+            m("python_full_version < '3.11'"),
+        );
+        assert_eq!(
+            complexify(None, Some([3, 11]), "python_full_version <= '3.10'"),
+            m("python_full_version <= '3.10'"),
+        );
+
+        assert_eq!(
+            complexify(Some([3, 11]), None, "python_full_version == '3.8'"),
+            // Kinda weird, but this normalizes to `false`, just like the above.
+            m("python_full_version < '0' and python_full_version > '0'"),
+        );
+        assert_eq!(
+            complexify(
+                Some([3, 11]),
+                None,
+                "python_full_version == '3.8' or python_full_version == '3.12'"
+            ),
+            m("python_full_version == '3.12'"),
+        );
+        assert_eq!(
+            complexify(
+                Some([3, 11]),
+                None,
+                "python_full_version == '3.8' \
+                 or python_full_version == '3.11' \
+                 or python_full_version == '3.12'"
+            ),
+            m("python_full_version == '3.11' or python_full_version == '3.12'"),
+        );
+
+        // Tests a tricky case where if a marker is always true, then
+        // complexifying it will proceed correctly by adding the
+        // requires-python constraint. This is a regression test for
+        // an early implementation that special cased the "always
+        // true" case to return "always true" regardless of the
+        // requires-python bounds.
+        assert_eq!(
+            complexify(
+                Some([3, 12]),
+                None,
+                "python_full_version < '3.10' or python_full_version >= '3.10'"
+            ),
+            m("python_full_version >= '3.12'"),
+        );
+    }
+
+    #[test]
+    fn simplified_markers() {
+        // Takes optional lower (inclusive) and upper (exclusive)
+        // bounds representing `requires-python` and a "complexified"
+        // marker, and returns the "simplified" marker. That is, a
+        // marker that assumes `requires-python` is true.
+        let simplify =
+            |lower: Option<[u64; 2]>, upper: Option<[u64; 2]>, marker: &str| -> MarkerTree {
+                let lower = lower
+                    .map(|release| Bound::Included(Version::new(release)))
+                    .unwrap_or(Bound::Unbounded);
+                let upper = upper
+                    .map(|release| Bound::Excluded(Version::new(release)))
+                    .unwrap_or(Bound::Unbounded);
+                m(marker).simplify_python_versions(lower.as_ref(), upper.as_ref())
+            };
+
+        assert_eq!(
+            simplify(
+                Some([3, 8]),
+                None,
+                "python_full_version >= '3.8' and python_full_version < '3.10'"
+            ),
+            m("python_full_version < '3.10'"),
+        );
+        assert_eq!(
+            simplify(Some([3, 8]), None, "python_full_version < '3.7'"),
+            // Kinda weird, but this normalizes to `false`, just like the above.
+            m("python_full_version < '0' and python_full_version > '0'"),
+        );
+        assert_eq!(
+            simplify(
+                Some([3, 8]),
+                Some([3, 11]),
+                "python_full_version == '3.7.*' \
+                 or python_full_version == '3.8.*' \
+                 or python_full_version == '3.10.*' \
+                 or python_full_version == '3.11.*' \
+                "
+            ),
+            // Given `requires-python = '>=3.8,<3.11'`, only `3.8.*`
+            // and `3.10.*` can possibly be true. So this simplifies
+            // to `!= 3.9.*`.
+            m("python_full_version != '3.9.*'"),
+        );
+        assert_eq!(
+            simplify(
+                Some([3, 8]),
+                None,
+                "python_full_version >= '3.8' and sys_platform == 'win32'"
+            ),
+            m("sys_platform == 'win32'"),
+        );
+        assert_eq!(
+            simplify(
+                Some([3, 8]),
+                None,
+                "python_full_version >= '3.9' \
+                 and (sys_platform == 'win32' or python_full_version >= '3.8')",
+            ),
+            m("python_full_version >= '3.9'"),
+        );
+    }
 }