Detect musl and error for musl pbs builds (#6643)

As described in #4242, we're currently incorrectly downloading glibc
python-build-standalone on musl target, but we also can't fix this by
using musl python-build-standalone on musl targets since the musl builds
are effectively broken.

We reintroduce the libc detection previously removed in #2381, using it
to detect which libc is the current one before we have a python
interpreter. I changed the strategy a big to support an empty `PATH`
which we use in the tests.

For simplicity, i've decided to just filter out the musl
python-build-standalone archives from the list of available archive,
given this is temporary. This means we show the same error message as if
we don't have a build for the platform. We could also add a dedicated
error message for musl.

Fixes #4242

## Test Plan

Tested manually.

On my ubuntu host, python downloads continue to pass:
```
target/x86_64-unknown-linux-musl/debug/uv python install
```

On alpine, we fail:
```
$ docker run -it --rm -v .:/io alpine /io/target/x86_64-unknown-linux-musl/debug/uv python install
  Searching for Python installations
  error: No download found for request: cpython-any-linux-x86_64-musl
```
This commit is contained in:
konsti 2024-08-27 02:06:53 +02:00 committed by GitHub
parent 1ae2c3f142
commit ae57d85dfb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 363 additions and 30 deletions

44
Cargo.lock generated
View file

@ -1509,6 +1509,17 @@ dependencies = [
"walkdir",
]
[[package]]
name = "goblin"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b363a30c165f666402fe6a3024d3bec7ebc898f96a4a23bd1c99f8dbf3f4f47"
dependencies = [
"log",
"plain",
"scroll",
]
[[package]]
name = "h2"
version = "0.4.5"
@ -1951,7 +1962,7 @@ checksum = "8ef8bc400f8312944a9f879db116fed372c4f0859af672eba2a80f79c767dd19"
dependencies = [
"jiff-tzdb-platform",
"serde",
"windows-sys 0.52.0",
"windows-sys 0.59.0",
]
[[package]]
@ -2635,6 +2646,12 @@ version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
[[package]]
name = "plain"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6"
[[package]]
name = "platform-info"
version = "2.0.3"
@ -2794,7 +2811,7 @@ dependencies = [
"indoc",
"libc",
"memoffset 0.9.1",
"parking_lot 0.11.2",
"parking_lot 0.12.3",
"portable-atomic",
"pyo3-build-config",
"pyo3-ffi",
@ -3511,6 +3528,26 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scroll"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6"
dependencies = [
"scroll_derive",
]
[[package]]
name = "scroll_derive"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.76",
]
[[package]]
name = "seahash"
version = "4.1.0"
@ -5005,6 +5042,7 @@ dependencies = [
"distribution-filename",
"fs-err",
"futures",
"goblin",
"indoc",
"install-wheel-rs",
"itertools 0.13.0",
@ -5504,7 +5542,7 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.48.0",
"windows-sys 0.59.0",
]
[[package]]

View file

@ -90,6 +90,7 @@ fs-err = { version = "2.11.0" }
fs2 = { version = "0.4.3" }
futures = { version = "0.3.30" }
glob = { version = "0.3.1" }
goblin = { version = "0.8.2", default-features = false, features = ["std", "elf32", "elf64", "endian_fd"] }
hex = { version = "0.4.3" }
home = { version = "0.5.9" }
html-escape = { version = "0.2.13" }

View file

@ -32,6 +32,7 @@ clap = { workspace = true, optional = true }
configparser = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }
goblin = { workspace = true }
itertools = { workspace = true }
owo-colors = { workspace = true }
regex = { workspace = true }

View file

@ -22,6 +22,7 @@ use crate::implementation::{
Error as ImplementationError, ImplementationName, LenientImplementationName,
};
use crate::installation::PythonInstallationKey;
use crate::libc::LibcDetectionError;
use crate::platform::{self, Arch, Libc, Os};
use crate::{Interpreter, PythonRequest, PythonVersion, VersionRequest};
@ -75,6 +76,8 @@ pub enum Error {
"A mirror was provided via `{0}`, but the URL does not match the expected format: {0}"
)]
Mirror(&'static str, &'static str),
#[error(transparent)]
LibcDetection(#[from] LibcDetectionError),
}
#[derive(Debug, PartialEq)]
@ -167,8 +170,7 @@ impl PythonDownloadRequest {
/// Fill empty entries with default values.
///
/// Platform information is pulled from the environment.
#[must_use]
pub fn fill(mut self) -> Self {
pub fn fill(mut self) -> Result<Self, Error> {
if self.implementation.is_none() {
self.implementation = Some(ImplementationName::CPython);
}
@ -179,9 +181,9 @@ impl PythonDownloadRequest {
self.os = Some(Os::from_env());
}
if self.libc.is_none() {
self.libc = Some(Libc::from_env());
self.libc = Some(Libc::from_env()?);
}
self
Ok(self)
}
/// Construct a new [`PythonDownloadRequest`] with platform information from the environment.
@ -191,7 +193,7 @@ impl PythonDownloadRequest {
None,
Some(Arch::from_env()),
Some(Os::from_env()),
Some(Libc::from_env()),
Some(Libc::from_env()?),
))
}
@ -387,7 +389,11 @@ impl ManagedPythonDownload {
/// Iterate over all [`PythonDownload`]'s.
pub fn iter_all() -> impl Iterator<Item = &'static ManagedPythonDownload> {
PYTHON_DOWNLOADS.iter()
PYTHON_DOWNLOADS
.iter()
// TODO(konsti): musl python-build-standalone builds are currently broken (statically
// linked), so we pretend they don't exist. https://github.com/astral-sh/uv/issues/4242
.filter(|download| download.key.libc != Libc::Some(target_lexicon::Environment::Musl))
}
pub fn url(&self) -> &str {

View file

@ -99,7 +99,7 @@ impl PythonInstallation {
{
if let Some(request) = PythonDownloadRequest::from_request(&request) {
debug!("Requested Python not found, checking for available download...");
match Self::fetch(request.fill(), client_builder, cache, reporter).await {
match Self::fetch(request.fill()?, client_builder, cache, reporter).await {
Ok(installation) => Ok(installation),
Err(Error::Download(downloads::Error::NoDownloadFound(_))) => {
Err(Error::MissingPython(err))

View file

@ -24,6 +24,7 @@ mod environment;
mod implementation;
mod installation;
mod interpreter;
mod libc;
pub mod managed;
pub mod platform;
mod pointer_size;

View file

@ -0,0 +1,279 @@
//! Determine the libc (glibc or musl) on linux.
//!
//! Taken from `glibc_version` (<https://github.com/delta-incubator/glibc-version-rs>),
//! which used the Apache 2.0 license (but not the MIT license)
use fs_err as fs;
use goblin::elf::Elf;
use regex::Regex;
use std::io;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::sync::LazyLock;
use thiserror::Error;
use tracing::trace;
use uv_fs::Simplified;
#[derive(Debug, Error)]
pub enum LibcDetectionError {
#[error("Could not detect either glibc version nor musl libc version, at least one of which is required")]
NoLibcFound,
#[error("Failed to get base name of symbolic link path {0}")]
MissingBasePath(PathBuf),
#[error("Failed to find glibc version in the filename of linker: `{0}`")]
GlibcExtractionMismatch(PathBuf),
#[error("Failed to determine {libc} version by running: `{program}`")]
FailedToRun {
libc: &'static str,
program: String,
#[source]
err: io::Error,
},
#[error("Could not find glibc version in output of: `ldd --version`")]
InvalidLddOutputGnu,
#[error("Could not find musl version in output of: `{0}`")]
InvalidLddOutputMusl(PathBuf),
#[error("Could not read ELF interpreter from any of the following paths: {0}")]
CoreBinaryParsing(String),
#[error("Failed to determine libc")]
Io(#[from] io::Error),
}
/// We support glibc (manylinux) and musl (musllinux) on linux.
#[derive(Debug, PartialEq, Eq)]
pub(crate) enum LibcVersion {
Manylinux { major: u32, minor: u32 },
Musllinux { major: u32, minor: u32 },
}
/// Determine whether we're running glibc or musl and in which version, given we are on linux.
///
/// Normally, we determine this from the python interpreter, which is more accurate, but when
/// deciding which python interpreter to download, we need to figure this out from the environment.
///
/// A platform can have both musl and glibc installed. We determine the preferred platform by
/// inspecting core binaries.
pub(crate) fn detect_linux_libc() -> Result<LibcVersion, LibcDetectionError> {
let ld_path = find_ld_path()?;
trace!("ld path: {}", ld_path.user_display());
match detect_musl_version(&ld_path) {
Ok(os) => return Ok(os),
Err(err) => {
trace!("Tried to find musl version by running `{ld_path:?}`, but failed: {err}");
}
}
match detect_linux_libc_from_ld_symlink(&ld_path) {
Ok(os) => return Ok(os),
Err(err) => {
trace!("Tried to find libc version from possible symlink at {ld_path:?}, but failed: {err}");
}
}
match detect_glibc_version_from_ldd(&ld_path) {
Ok(os_version) => return Ok(os_version),
Err(err) => {
trace!("Tried to find glibc version from `ldd --version`, but failed: {err}");
}
}
Err(LibcDetectionError::NoLibcFound)
}
// glibc version is taken from `std/sys/unix/os.rs`.
fn detect_glibc_version_from_ldd(ldd: &Path) -> Result<LibcVersion, LibcDetectionError> {
let output = Command::new(ldd)
.args(["--version"])
.output()
.map_err(|err| LibcDetectionError::FailedToRun {
libc: "glibc",
program: format!("{} --version", ldd.user_display()),
err,
})?;
if let Some(os) = glibc_ldd_output_to_version("stdout", &output.stdout) {
return Ok(os);
}
if let Some(os) = glibc_ldd_output_to_version("stderr", &output.stderr) {
return Ok(os);
}
Err(LibcDetectionError::InvalidLddOutputGnu)
}
/// Parse `ldd --version` output.
///
/// Example: `ld.so (Ubuntu GLIBC 2.39-0ubuntu8.3) stable release version 2.39.`.
fn glibc_ldd_output_to_version(kind: &str, output: &[u8]) -> Option<LibcVersion> {
static RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"ld.so \(.+\) .* ([0-9]+\.[0-9]+)").unwrap());
let output = String::from_utf8_lossy(output);
trace!("{kind} output from `ldd --version`: {output:?}");
let (_, [version]) = RE.captures(output.as_ref()).map(|c| c.extract())?;
// Parse the input as "x.y" glibc version.
let mut parsed_ints = version.split('.').map(str::parse).fuse();
let major = parsed_ints.next()?.ok()?;
let minor = parsed_ints.next()?.ok()?;
trace!("Found manylinux {major}.{minor} in {kind} of `ldd --version`");
Some(LibcVersion::Manylinux { major, minor })
}
fn detect_linux_libc_from_ld_symlink(path: &Path) -> Result<LibcVersion, LibcDetectionError> {
static RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"^ld-([0-9]{1,3})\.([0-9]{1,3})\.so$").unwrap());
let ld_path = fs::read_link(path)?;
let filename = ld_path
.file_name()
.ok_or_else(|| LibcDetectionError::MissingBasePath(ld_path.clone()))?
.to_string_lossy();
let (_, [major, minor]) = RE
.captures(&filename)
.map(|c| c.extract())
.ok_or_else(|| LibcDetectionError::GlibcExtractionMismatch(ld_path.clone()))?;
// OK since we are guaranteed to have between 1 and 3 ASCII digits and the
// maximum possible value, 999, fits into a u16.
let major = major.parse().expect("valid major version");
let minor = minor.parse().expect("valid minor version");
Ok(LibcVersion::Manylinux { major, minor })
}
/// Read the musl version from libc library's output. Taken from maturin.
///
/// The libc library should output something like this to `stderr`:
///
/// ```text
/// musl libc (`x86_64`)
/// Version 1.2.2
/// Dynamic Program Loader
/// ```
fn detect_musl_version(ld_path: impl AsRef<Path>) -> Result<LibcVersion, LibcDetectionError> {
let ld_path = ld_path.as_ref();
let output = Command::new(ld_path)
.stdout(Stdio::null())
.stderr(Stdio::piped())
.output()
.map_err(|err| LibcDetectionError::FailedToRun {
libc: "musl",
program: ld_path.to_string_lossy().to_string(),
err,
})?;
if let Some(os) = musl_ld_output_to_version("stdout", &output.stdout) {
return Ok(os);
}
if let Some(os) = musl_ld_output_to_version("stderr", &output.stderr) {
return Ok(os);
}
Err(LibcDetectionError::InvalidLddOutputMusl(
ld_path.to_path_buf(),
))
}
/// Parse the musl version from ld output.
///
/// Example: `Version 1.2.5`.
fn musl_ld_output_to_version(kind: &str, output: &[u8]) -> Option<LibcVersion> {
static RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"Version ([0-9]{1,4})\.([0-9]{1,4})").unwrap());
let output = String::from_utf8_lossy(output);
trace!("{kind} output from `ld`: {output:?}");
let (_, [major, minor]) = RE.captures(output.as_ref()).map(|c| c.extract())?;
// unwrap-safety: Since we are guaranteed to have between 1 and 4 ASCII digits and the
// maximum possible value, 9999, fits into a u16.
let major = major.parse().expect("valid major version");
let minor = minor.parse().expect("valid minor version");
trace!("Found musllinux {major}.{minor} in {kind} of `ld`");
Some(LibcVersion::Musllinux { major, minor })
}
/// Find musl ld path from executable's ELF header.
fn find_ld_path() -> Result<PathBuf, LibcDetectionError> {
// At first, we just looked for /bin/ls. But on some Linux distros, /bin/ls
// is a shell script that just calls /usr/bin/ls. So we switched to looking
// at /bin/sh. But apparently in some environments, /bin/sh is itself just
// a shell script that calls /bin/dash. So... We just try a few different
// paths. In most cases, /bin/sh should work.
//
// See: https://github.com/astral-sh/uv/pull/1493
// See: https://github.com/astral-sh/uv/issues/1810
// See: https://github.com/astral-sh/uv/issues/4242#issuecomment-2306164449
let attempts = ["/bin/sh", "/usr/bin/env", "/bin/dash", "/bin/ls"];
for path in attempts {
if let Some(ld_path) = find_ld_path_at(path) {
return Ok(ld_path);
}
}
Err(LibcDetectionError::CoreBinaryParsing(attempts.join(", ")))
}
/// Attempt to find the path to the `ld` executable by
/// ELF parsing the given path. If this fails for any
/// reason, then an error is returned.
fn find_ld_path_at(path: impl AsRef<Path>) -> Option<PathBuf> {
let path = path.as_ref();
// Not all linux distributions have all of these paths.
let buffer = fs::read(path).ok()?;
let elf = match Elf::parse(&buffer) {
Ok(elf) => elf,
Err(err) => {
trace!(
"Could not parse ELF file at `{}`: `{}`",
path.user_display(),
err
);
return None;
}
};
let Some(elf_interpreter) = elf.interpreter else {
trace!(
"Couldn't find ELF interpreter path from {}",
path.user_display()
);
return None;
};
Some(PathBuf::from(elf_interpreter))
}
#[cfg(test)]
mod tests {
use super::*;
use indoc::indoc;
#[test]
fn parse_ldd_output() {
let ver_str = glibc_ldd_output_to_version(
"stdout",
indoc! {br"ld.so (Ubuntu GLIBC 2.39-0ubuntu8.3) stable release version 2.39.
Copyright (C) 2024 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.
There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A
PARTICULAR PURPOSE.
"},
)
.unwrap();
assert_eq!(
ver_str,
LibcVersion::Manylinux {
major: 2,
minor: 39
}
);
}
#[test]
fn parse_musl_ld_output() {
// This output was generated by running `/lib/ld-musl-x86_64.so.1`
// in an Alpine Docker image. The Alpine version:
//
// # cat /etc/alpine-release
// 3.19.1
let output = b"\
musl libc (x86_64)
Version 1.2.4_git20230717
Dynamic Program Loader
Usage: /lib/ld-musl-x86_64.so.1 [options] [--] pathname [args]\
";
let got = musl_ld_output_to_version("stderr", output).unwrap();
assert_eq!(got, LibcVersion::Musllinux { major: 1, minor: 2 });
}
}

View file

@ -16,6 +16,7 @@ use crate::implementation::{
Error as ImplementationError, ImplementationName, LenientImplementationName,
};
use crate::installation::{self, PythonInstallationKey};
use crate::libc::LibcDetectionError;
use crate::platform::Error as PlatformError;
use crate::platform::{Arch, Libc, Os};
use crate::python_version::PythonVersion;
@ -52,6 +53,8 @@ pub enum Error {
NameError(String),
#[error(transparent)]
NameParseError(#[from] installation::PythonInstallationKeyError),
#[error(transparent)]
LibcDetection(#[from] LibcDetectionError),
}
/// A collection of uv-managed Python installations installed on the current system.
#[derive(Debug, Clone)]
@ -193,7 +196,7 @@ impl ManagedPythonInstallations {
pub fn find_matching_current_platform(
&self,
) -> Result<impl DoubleEndedIterator<Item = ManagedPythonInstallation>, Error> {
let platform_key = platform_key_from_env();
let platform_key = platform_key_from_env()?;
let iter = ManagedPythonInstallations::from_settings()?
.find_all()?
@ -347,11 +350,11 @@ impl ManagedPythonInstallation {
}
/// Generate a platform portion of a key from the environment.
fn platform_key_from_env() -> String {
fn platform_key_from_env() -> Result<String, Error> {
let os = Os::from_env();
let arch = Arch::from_env();
let libc = Libc::from_env();
format!("{os}-{arch}-{libc}").to_lowercase()
let libc = Libc::from_env()?;
Ok(format!("{os}-{arch}-{libc}").to_lowercase())
}
impl fmt::Display for ManagedPythonInstallation {

View file

@ -1,3 +1,4 @@
use crate::libc::{detect_linux_libc, LibcDetectionError, LibcVersion};
use std::fmt::Display;
use std::ops::Deref;
use std::{fmt, str::FromStr};
@ -26,15 +27,15 @@ pub enum Libc {
}
impl Libc {
pub(crate) fn from_env() -> Self {
pub(crate) fn from_env() -> Result<Self, LibcDetectionError> {
match std::env::consts::OS {
// TODO(zanieb): On Linux, we use the uv target host to determine the libc variant
// but we should only use this as a fallback and should instead inspect the
// machine's `/bin/sh` (or similar).
"linux" => Self::Some(target_lexicon::Environment::Gnu),
"windows" | "macos" => Self::None,
"linux" => Ok(Self::Some(match detect_linux_libc()? {
LibcVersion::Manylinux { .. } => target_lexicon::Environment::Gnu,
LibcVersion::Musllinux { .. } => target_lexicon::Environment::Musl,
})),
"windows" | "macos" => Ok(Self::None),
// Use `None` on platforms without explicit support.
_ => Self::None,
_ => Ok(Self::None),
}
}
}

View file

@ -118,8 +118,7 @@ pub(crate) async fn install(
let downloads = unfilled_requests
.into_iter()
// Populate the download requests with defaults
.map(PythonDownloadRequest::fill)
.map(|request| ManagedPythonDownload::from_request(&request))
.map(|request| ManagedPythonDownload::from_request(&PythonDownloadRequest::fill(request)?))
.collect::<Result<Vec<_>, uv_python::downloads::Error>>()?;
// Ensure we only download each version once

View file

@ -249,16 +249,20 @@ uv supports downloading and installing CPython and PyPy distributions.
### CPython distributions
Python does not publish official distributable CPython binaries, uv uses third-party standalone
distributions from the
[`python-build-standalone`](https://github.com/indygreg/python-build-standalone) project. The
project is partially maintained by the uv maintainers and is used by many other Python projects.
As Python does not publish official distributable CPython binaries, uv instead uses pre-built
third-party distributions from the
[`python-build-standalone`](https://github.com/indygreg/python-build-standalone) project.
`python-build-standalone` is partially maintained by the uv maintainers and is used in many other
Python projects, like [Rye](https://github.com/astral-sh/rye) and
[bazelbuild/rules_python](https://github.com/bazelbuild/rules_python).
The uv Python distributions are self-contained, highly-portable, and performant. While Python can be
built from source, as in tools like `pyenv`, it requires preinstalled system dependencies and
creating optimized, performant builds is very slow.
built from source, as in tools like `pyenv`, doing so requires preinstalled system dependencies, and
creating optimized, performant builds (e.g., with PGO and LTO enabled) is very slow.
These distributions have some behavior quirks, generally as a consequence of portability. See the
These distributions have some behavior quirks, generally as a consequence of portability; and, at
present, uv does not support installing them on musl-based Linux distributions, like Alpine Linux.
See the
[`python-build-standalone` quirks](https://gregoryszorc.com/docs/python-build-standalone/main/quirks.html)
documentation for details.