Merge pull request #8329 from drinkcat/printf-7209-update

printf: accept non-UTF-8 input in FORMAT and ARGUMENT arguments
2025-12-23 08:47:37 +00:00 · 2025-07-16 00:28:44 +02:00 · 2025-07-16 00:28:44 +02:00 · 1bb7930a08
commit 1bb7930a08
parent 50704da6e8 ccad817415
11 changed files with 512 additions and 292 deletions
--- a/src/uu/echo/src/echo.rs
+++ b/src/uu/echo/src/echo.rs
@ -8,10 +8,9 @@ use clap::{Arg, ArgAction, Command};
 use std::env;
 use std::ffi::{OsStr, OsString};
 use std::io::{self, StdoutLock, Write};
-use uucore::error::{UResult, USimpleError};
+use uucore::error::UResult;
 use uucore::format::{FormatChar, OctalParsing, parse_escape_only};
-use uucore::format_usage;
-use uucore::os_str_as_bytes;
+use uucore::{format_usage, os_str_as_bytes};

 use uucore::locale::get_message;

@ -223,9 +222,9 @@ pub fn uu_app() -> Command {

 fn execute(stdout: &mut StdoutLock, args: Vec<OsString>, options: Options) -> UResult<()> {
    for (i, arg) in args.into_iter().enumerate() {
-        let bytes = os_str_as_bytes(arg.as_os_str())
-            .map_err(|_| USimpleError::new(1, get_message("echo-error-non-utf8")))?;
+        let bytes = os_str_as_bytes(&arg)?;

+        // Don't print a space before the first argument
        if i > 0 {
            stdout.write_all(b" ")?;
        }
--- a/src/uu/printf/src/printf.rs
+++ b/src/uu/printf/src/printf.rs
@ -4,6 +4,7 @@
 // file that was distributed with this source code.
 use clap::{Arg, ArgAction, Command};
 use std::collections::HashMap;
+use std::ffi::OsString;
 use std::io::stdout;
 use std::ops::ControlFlow;
 use uucore::error::{UResult, UUsageError};
@ -18,21 +19,19 @@ mod options {
    pub const FORMAT: &str = "FORMAT";
    pub const ARGUMENT: &str = "ARGUMENT";
 }
+
 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
    let matches = uu_app().get_matches_from(args);

    let format = matches
-        .get_one::<std::ffi::OsString>(options::FORMAT)
+        .get_one::<OsString>(options::FORMAT)
        .ok_or_else(|| UUsageError::new(1, get_message("printf-error-missing-operand")))?;
    let format = os_str_as_bytes(format)?;

-    let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) {
-        // FIXME: use os_str_as_bytes once FormatArgument supports Vec<u8>
+    let values: Vec<_> = match matches.get_many::<OsString>(options::ARGUMENT) {
        Some(s) => s
-            .map(|os_string| {
-                FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string())
-            })
+            .map(|os_string| FormatArgument::Unparsed(os_string.to_owned()))
            .collect(),
        None => vec![],
    };
@ -62,7 +61,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
                "{}",
                get_message_with_args(
                    "printf-warning-ignoring-excess-arguments",
-                    HashMap::from([("arg".to_string(), arg_str.to_string())])
+                    HashMap::from([("arg".to_string(), arg_str.to_string_lossy().to_string())])
                )
            );
        }
@ -103,10 +102,10 @@ pub fn uu_app() -> Command {
                .help(get_message("printf-help-version"))
                .action(ArgAction::Version),
        )
-        .arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString)))
+        .arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(OsString)))
        .arg(
            Arg::new(options::ARGUMENT)
                .action(ArgAction::Append)
-                .value_parser(clap::value_parser!(std::ffi::OsString)),
+                .value_parser(clap::value_parser!(OsString)),
        )
 }
--- a/src/uucore/src/lib/features/checksum.rs
+++ b/src/uucore/src/lib/features/checksum.rs
@ -968,7 +968,7 @@ fn process_checksum_line(
    cached_line_format: &mut Option<LineFormat>,
    last_algo: &mut Option<String>,
 ) -> Result<(), LineCheckError> {
-    let line_bytes = os_str_as_bytes(line)?;
+    let line_bytes = os_str_as_bytes(line).map_err(|e| LineCheckError::UError(Box::new(e)))?;

    // Early return on empty or commented lines.
    if line.is_empty() || line_bytes.starts_with(b"#") {
--- a/src/uucore/src/lib/features/extendedbigdecimal.rs
+++ b/src/uucore/src/lib/features/extendedbigdecimal.rs
@ -101,6 +101,18 @@ impl From<f64> for ExtendedBigDecimal {
    }
 }

+impl From<u8> for ExtendedBigDecimal {
+    fn from(val: u8) -> Self {
+        Self::BigDecimal(val.into())
+    }
+}
+
+impl From<u32> for ExtendedBigDecimal {
+    fn from(val: u32) -> Self {
+        Self::BigDecimal(val.into())
+    }
+}
+
 impl ExtendedBigDecimal {
    pub fn zero() -> Self {
        Self::BigDecimal(0.into())
--- a/src/uucore/src/lib/features/format/argument.rs
+++ b/src/uucore/src/lib/features/format/argument.rs
@ -7,12 +7,16 @@ use super::ExtendedBigDecimal;
 use crate::format::spec::ArgumentLocation;
 use crate::{
    error::set_exit_code,
+    os_str_as_bytes,
    parser::num_parser::{ExtendedParser, ExtendedParserError},
    quoting_style::{QuotingStyle, locale_aware_escape_name},
    show_error, show_warning,
 };
 use os_display::Quotable;
-use std::{ffi::OsStr, num::NonZero};
+use std::{
+    ffi::{OsStr, OsString},
+    num::NonZero,
+};

 /// An argument for formatting
 ///
@ -24,12 +28,12 @@ use std::{ffi::OsStr, num::NonZero};
 #[derive(Clone, Debug, PartialEq)]
 pub enum FormatArgument {
    Char(char),
-    String(String),
+    String(OsString),
    UnsignedInt(u64),
    SignedInt(i64),
    Float(ExtendedBigDecimal),
    /// Special argument that gets coerced into the other variants
-    Unparsed(String),
+    Unparsed(OsString),
 }

 /// A struct that holds a slice of format arguments and provides methods to access them
@ -72,22 +76,25 @@ impl<'a> FormatArguments<'a> {
    pub fn next_char(&mut self, position: &ArgumentLocation) -> u8 {
        match self.next_arg(position) {
            Some(FormatArgument::Char(c)) => *c as u8,
-            Some(FormatArgument::Unparsed(s)) => s.bytes().next().unwrap_or(b'\0'),
+            Some(FormatArgument::Unparsed(os)) => match os_str_as_bytes(os) {
+                Ok(bytes) => bytes.first().copied().unwrap_or(b'\0'),
+                Err(_) => b'\0',
+            },
            _ => b'\0',
        }
    }

-    pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a str {
+    pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a OsStr {
        match self.next_arg(position) {
-            Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s,
-            _ => "",
+            Some(FormatArgument::Unparsed(os) | FormatArgument::String(os)) => os,
+            _ => "".as_ref(),
        }
    }

    pub fn next_i64(&mut self, position: &ArgumentLocation) -> i64 {
        match self.next_arg(position) {
            Some(FormatArgument::SignedInt(n)) => *n,
-            Some(FormatArgument::Unparsed(s)) => extract_value(i64::extended_parse(s), s),
+            Some(FormatArgument::Unparsed(os)) => Self::get_num::<i64>(os),
            _ => 0,
        }
    }
@ -95,25 +102,7 @@ impl<'a> FormatArguments<'a> {
    pub fn next_u64(&mut self, position: &ArgumentLocation) -> u64 {
        match self.next_arg(position) {
            Some(FormatArgument::UnsignedInt(n)) => *n,
-            Some(FormatArgument::Unparsed(s)) => {
-                // Check if the string is a character literal enclosed in quotes
-                if s.starts_with(['"', '\'']) {
-                    // Extract the content between the quotes safely using chars
-                    let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars();
-                    if let Some(first_char) = chars.next() {
-                        if chars.clone().count() > 0 {
-                            // Emit a warning if there are additional characters
-                            let remaining: String = chars.collect();
-                            show_warning!(
-                                "{remaining}: character(s) following character constant have been ignored"
-                            );
-                        }
-                        return first_char as u64; // Use only the first character
-                    }
-                    return 0; // Empty quotes
-                }
-                extract_value(u64::extended_parse(s), s)
-            }
+            Some(FormatArgument::Unparsed(os)) => Self::get_num::<u64>(os),
            _ => 0,
        }
    }
@ -121,13 +110,81 @@ impl<'a> FormatArguments<'a> {
    pub fn next_extended_big_decimal(&mut self, position: &ArgumentLocation) -> ExtendedBigDecimal {
        match self.next_arg(position) {
            Some(FormatArgument::Float(n)) => n.clone(),
-            Some(FormatArgument::Unparsed(s)) => {
-                extract_value(ExtendedBigDecimal::extended_parse(s), s)
-            }
+            Some(FormatArgument::Unparsed(os)) => Self::get_num::<ExtendedBigDecimal>(os),
            _ => ExtendedBigDecimal::zero(),
        }
    }

+    // Parse an OsStr that we know to start with a '/"
+    fn parse_quote_start<T>(os: &OsStr) -> Result<T, ExtendedParserError<T>>
+    where
+        T: ExtendedParser + From<u8> + From<u32> + Default,
+    {
+        // If this fails (this can only happens on Windows), then just
+        // return NotNumeric.
+        let s = match os_str_as_bytes(os) {
+            Ok(s) => s,
+            Err(_) => return Err(ExtendedParserError::NotNumeric),
+        };
+
+        let bytes = match s.split_first() {
+            Some((b'"', bytes)) | Some((b'\'', bytes)) => bytes,
+            _ => {
+                // This really can't happen, the string we are given must start with '/".
+                debug_assert!(false);
+                return Err(ExtendedParserError::NotNumeric);
+            }
+        };
+
+        if bytes.is_empty() {
+            return Err(ExtendedParserError::NotNumeric);
+        }
+
+        let (val, len) = if let Some(c) = bytes
+            .utf8_chunks()
+            .next()
+            .expect("bytes should not be empty")
+            .valid()
+            .chars()
+            .next()
+        {
+            // Valid UTF-8 character, cast the codepoint to u32 then T
+            // (largest unicode codepoint is only 3 bytes, so this is safe)
+            ((c as u32).into(), c.len_utf8())
+        } else {
+            // Not a valid UTF-8 character, use the first byte
+            (bytes[0].into(), 1)
+        };
+        // Emit a warning if there are additional characters
+        if bytes.len() > len {
+            return Err(ExtendedParserError::PartialMatch(
+                val,
+                String::from_utf8_lossy(&bytes[len..]).into_owned(),
+            ));
+        }
+
+        Ok(val)
+    }
+
+    fn get_num<T>(os: &OsStr) -> T
+    where
+        T: ExtendedParser + From<u8> + From<u32> + Default,
+    {
+        let s = os.to_string_lossy();
+        let first = s.as_bytes().first().copied();
+
+        let quote_start = first == Some(b'"') || first == Some(b'\'');
+        let parsed = if quote_start {
+            // The string begins with a quote
+            Self::parse_quote_start(os)
+        } else {
+            T::extended_parse(&s)
+        };
+
+        // Get the best possible value, even if parsed was an error.
+        extract_value(parsed, &s, quote_start)
+    }
+
    fn get_at_relative_position(&mut self, pos: NonZero<usize>) -> Option<&'a FormatArgument> {
        let pos: usize = pos.into();
        let pos = (pos - 1).saturating_add(self.current_offset);
@ -147,7 +204,11 @@ impl<'a> FormatArguments<'a> {
    }
 }

-fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &str) -> T {
+fn extract_value<T: Default>(
+    p: Result<T, ExtendedParserError<T>>,
+    input: &str,
+    quote_start: bool,
+) -> T {
    match p {
        Ok(v) => v,
        Err(e) => {
@ -167,14 +228,15 @@ fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &s
                    Default::default()
                }
                ExtendedParserError::PartialMatch(v, rest) => {
-                    let bytes = input.as_encoded_bytes();
-                    if !bytes.is_empty() && (bytes[0] == b'\'' || bytes[0] == b'"') {
+                    if quote_start {
+                        set_exit_code(0);
                        show_warning!(
                            "{rest}: character(s) following character constant have been ignored"
                        );
                    } else {
                        show_error!("{}: value not completely converted", input.quote());
                    }
+
                    v
                }
            }
@ -249,11 +311,11 @@ mod tests {
        // Test with different method types in sequence
        let args = [
            FormatArgument::Char('a'),
-            FormatArgument::String("hello".to_string()),
-            FormatArgument::Unparsed("123".to_string()),
-            FormatArgument::String("world".to_string()),
+            FormatArgument::String("hello".into()),
+            FormatArgument::Unparsed("123".into()),
+            FormatArgument::String("world".into()),
            FormatArgument::Char('z'),
-            FormatArgument::String("test".to_string()),
+            FormatArgument::String("test".into()),
        ];
        let mut args = FormatArguments::new(&args);

@ -384,10 +446,10 @@ mod tests {
    fn test_unparsed_arguments() {
        // Test with unparsed arguments that get coerced
        let args = [
-            FormatArgument::Unparsed("hello".to_string()),
-            FormatArgument::Unparsed("123".to_string()),
-            FormatArgument::Unparsed("hello".to_string()),
-            FormatArgument::Unparsed("456".to_string()),
+            FormatArgument::Unparsed("hello".into()),
+            FormatArgument::Unparsed("123".into()),
+            FormatArgument::Unparsed("hello".into()),
+            FormatArgument::Unparsed("456".into()),
        ];
        let mut args = FormatArguments::new(&args);

@ -409,10 +471,10 @@ mod tests {
        // Test with mixed types and positional access
        let args = [
            FormatArgument::Char('a'),
-            FormatArgument::String("test".to_string()),
+            FormatArgument::String("test".into()),
            FormatArgument::UnsignedInt(42),
            FormatArgument::Char('b'),
-            FormatArgument::String("more".to_string()),
+            FormatArgument::String("more".into()),
            FormatArgument::UnsignedInt(99),
        ];
        let mut args = FormatArguments::new(&args);
--- a/src/uucore/src/lib/features/format/mod.rs
+++ b/src/uucore/src/lib/features/format/mod.rs
@ -37,8 +37,12 @@ pub mod human;
 pub mod num_format;
 mod spec;

+pub use self::escape::{EscapedChar, OctalParsing};
 use crate::extendedbigdecimal::ExtendedBigDecimal;
-pub use argument::*;
+pub use argument::{FormatArgument, FormatArguments};
+
+use self::{escape::parse_escape_code, num_format::Formatter};
+use crate::{NonUtf8OsStrError, error::UError};
 pub use spec::Spec;
 use std::{
    error::Error,
@ -50,13 +54,6 @@ use std::{

 use os_display::Quotable;

-use crate::error::UError;
-
-pub use self::{
-    escape::{EscapedChar, OctalParsing, parse_escape_code},
-    num_format::Formatter,
-};
-
 #[derive(Debug)]
 pub enum FormatError {
    SpecError(Vec<u8>),
@ -74,6 +71,7 @@ pub enum FormatError {
    /// The hexadecimal characters represent a code point that cannot represent a
    /// Unicode character (e.g., a surrogate code point)
    InvalidCharacter(char, Vec<u8>),
+    InvalidEncoding(NonUtf8OsStrError),
 }

 impl Error for FormatError {}
@ -85,6 +83,12 @@ impl From<std::io::Error> for FormatError {
    }
 }

+impl From<NonUtf8OsStrError> for FormatError {
+    fn from(value: NonUtf8OsStrError) -> FormatError {
+        FormatError::InvalidEncoding(value)
+    }
+}
+
 impl Display for FormatError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
@ -118,6 +122,7 @@ impl Display for FormatError {
                "invalid universal character name \\{escape_char}{}",
                String::from_utf8_lossy(digits)
            ),
+            Self::InvalidEncoding(no) => no.fmt(f),
        }
    }
 }
--- a/src/uucore/src/lib/features/format/spec.rs
+++ b/src/uucore/src/lib/features/format/spec.rs
@ -5,8 +5,6 @@

 // spell-checker:ignore (vars) intmax ptrdiff padlen

-use crate::quoting_style::{QuotingStyle, locale_aware_escape_name};
-
 use super::{
    ExtendedBigDecimal, FormatChar, FormatError, OctalParsing,
    num_format::{
@ -15,7 +13,11 @@ use super::{
    },
    parse_escape_only,
 };
-use crate::format::FormatArguments;
+use crate::{
+    format::FormatArguments,
+    os_str_as_bytes,
+    quoting_style::{QuotingStyle, locale_aware_escape_name},
+};
 use std::{io::Write, num::NonZero, ops::ControlFlow};

 /// A parsed specification for formatting a value
@ -375,22 +377,21 @@ impl Spec {
                // TODO: We need to not use Rust's formatting for aligning the output,
                // so that we can just write bytes to stdout without panicking.
                let precision = resolve_asterisk_precision(*precision, args);
-                let s = args.next_string(position);
+                let os_str = args.next_string(position);
+                let bytes = os_str_as_bytes(os_str)?;
+
                let truncated = match precision {
-                    Some(p) if p < s.len() => &s[..p],
-                    _ => s,
+                    Some(p) if p < os_str.len() => &bytes[..p],
+                    _ => bytes,
                };
-                write_padded(
-                    writer,
-                    truncated.as_bytes(),
-                    width,
-                    *align_left || neg_width,
-                )
+                write_padded(writer, truncated, width, *align_left || neg_width)
            }
            Self::EscapedString { position } => {
-                let s = args.next_string(position);
-                let mut parsed = Vec::new();
-                for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) {
+                let os_str = args.next_string(position);
+                let bytes = os_str_as_bytes(os_str)?;
+                let mut parsed = Vec::<u8>::new();
+
+                for c in parse_escape_only(bytes, OctalParsing::ThreeDigits) {
                    match c.write(&mut parsed)? {
                        ControlFlow::Continue(()) => {}
                        ControlFlow::Break(()) => {
@ -403,15 +404,11 @@ impl Spec {
            }
            Self::QuotedString { position } => {
                let s = locale_aware_escape_name(
-                    args.next_string(position).as_ref(),
+                    args.next_string(position),
                    QuotingStyle::SHELL_ESCAPE,
                );
-                #[cfg(unix)]
-                let bytes = std::os::unix::ffi::OsStringExt::into_vec(s);
-                #[cfg(not(unix))]
-                let bytes = s.to_string_lossy().as_bytes().to_owned();
-
-                writer.write_all(&bytes).map_err(FormatError::IoError)
+                let bytes = os_str_as_bytes(&s)?;
+                writer.write_all(bytes).map_err(FormatError::IoError)
            }
            Self::SignedInt {
                width,
@ -646,7 +643,7 @@ mod tests {
                Some((42, false)),
                resolve_asterisk_width(
                    Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
-                    &mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]),
+                    &mut FormatArguments::new(&[FormatArgument::Unparsed("42".into())]),
                )
            );

@ -661,7 +658,7 @@ mod tests {
                Some((42, true)),
                resolve_asterisk_width(
                    Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
-                    &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]),
+                    &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".into())]),
                )
            );

@ -672,9 +669,9 @@ mod tests {
                        NonZero::new(2).unwrap()
                    ))),
                    &mut FormatArguments::new(&[
-                        FormatArgument::Unparsed("1".to_string()),
-                        FormatArgument::Unparsed("2".to_string()),
-                        FormatArgument::Unparsed("3".to_string())
+                        FormatArgument::Unparsed("1".into()),
+                        FormatArgument::Unparsed("2".into()),
+                        FormatArgument::Unparsed("3".into())
                    ]),
                )
            );
@ -717,7 +714,7 @@ mod tests {
                Some(42),
                resolve_asterisk_precision(
                    Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
-                    &mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]),
+                    &mut FormatArguments::new(&[FormatArgument::Unparsed("42".into())]),
                )
            );

@ -732,7 +729,7 @@ mod tests {
                Some(0),
                resolve_asterisk_precision(
                    Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
-                    &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]),
+                    &mut FormatArguments::new(&[FormatArgument::Unparsed("-42".into())]),
                )
            );
            assert_eq!(
@ -742,9 +739,9 @@ mod tests {
                        NonZero::new(2).unwrap()
                    ))),
                    &mut FormatArguments::new(&[
-                        FormatArgument::Unparsed("1".to_string()),
-                        FormatArgument::Unparsed("2".to_string()),
-                        FormatArgument::Unparsed("3".to_string())
+                        FormatArgument::Unparsed("1".into()),
+                        FormatArgument::Unparsed("2".into()),
+                        FormatArgument::Unparsed("3".into())
                    ]),
                )
            );
--- a/src/uucore/src/lib/features/parser/num_parser.rs
+++ b/src/uucore/src/lib/features/parser/num_parser.rs
@ -109,12 +109,12 @@ impl Base {

 /// Type returned if a number could not be parsed in its entirety
 #[derive(Debug, PartialEq)]
-pub enum ExtendedParserError<'a, T> {
+pub enum ExtendedParserError<T> {
    /// The input as a whole makes no sense
    NotNumeric,
    /// The beginning of the input made sense and has been parsed,
    /// while the remaining doesn't.
-    PartialMatch(T, &'a str),
+    PartialMatch(T, String),
    /// The value has overflowed the type storage. The returned value
    /// is saturated (e.g. positive or negative infinity, or min/max
    /// value for the integer type).
@ -124,7 +124,7 @@ pub enum ExtendedParserError<'a, T> {
    Underflow(T),
 }

-impl<'a, T> ExtendedParserError<'a, T>
+impl<T> ExtendedParserError<T>
 where
    T: Zero,
 {
@ -143,12 +143,12 @@ where
    /// conversion.
    fn map<U>(
        self,
-        f: impl FnOnce(T) -> Result<U, ExtendedParserError<'a, U>>,
-    ) -> ExtendedParserError<'a, U>
+        f: impl FnOnce(T) -> Result<U, ExtendedParserError<U>>,
+    ) -> ExtendedParserError<U>
    where
        U: Zero,
    {
-        fn extract<U>(v: Result<U, ExtendedParserError<'_, U>>) -> U
+        fn extract<U>(v: Result<U, ExtendedParserError<U>>) -> U
        where
            U: Zero,
        {
@ -172,15 +172,15 @@ where
 /// and `f64` float, where octal and binary formats are not allowed.
 pub trait ExtendedParser {
    // We pick a hopefully different name for our parser, to avoid clash with standard traits.
-    fn extended_parse(input: &str) -> Result<Self, ExtendedParserError<'_, Self>>
+    fn extended_parse(input: &str) -> Result<Self, ExtendedParserError<Self>>
    where
        Self: Sized;
 }

 impl ExtendedParser for i64 {
    /// Parse a number as i64. No fractional part is allowed.
-    fn extended_parse(input: &str) -> Result<i64, ExtendedParserError<'_, i64>> {
-        fn into_i64<'a>(ebd: ExtendedBigDecimal) -> Result<i64, ExtendedParserError<'a, i64>> {
+    fn extended_parse(input: &str) -> Result<i64, ExtendedParserError<i64>> {
+        fn into_i64(ebd: ExtendedBigDecimal) -> Result<i64, ExtendedParserError<i64>> {
            match ebd {
                ExtendedBigDecimal::BigDecimal(bd) => {
                    let (digits, scale) = bd.into_bigint_and_scale();
@ -214,8 +214,8 @@ impl ExtendedParser for i64 {

 impl ExtendedParser for u64 {
    /// Parse a number as u64. No fractional part is allowed.
-    fn extended_parse(input: &str) -> Result<u64, ExtendedParserError<'_, u64>> {
-        fn into_u64<'a>(ebd: ExtendedBigDecimal) -> Result<u64, ExtendedParserError<'a, u64>> {
+    fn extended_parse(input: &str) -> Result<u64, ExtendedParserError<u64>> {
+        fn into_u64(ebd: ExtendedBigDecimal) -> Result<u64, ExtendedParserError<u64>> {
            match ebd {
                ExtendedBigDecimal::BigDecimal(bd) => {
                    let (digits, scale) = bd.into_bigint_and_scale();
@ -251,8 +251,8 @@ impl ExtendedParser for u64 {

 impl ExtendedParser for f64 {
    /// Parse a number as f64
-    fn extended_parse(input: &str) -> Result<f64, ExtendedParserError<'_, f64>> {
-        fn into_f64<'a>(ebd: ExtendedBigDecimal) -> Result<f64, ExtendedParserError<'a, f64>> {
+    fn extended_parse(input: &str) -> Result<f64, ExtendedParserError<f64>> {
+        fn into_f64(ebd: ExtendedBigDecimal) -> Result<f64, ExtendedParserError<f64>> {
            // TODO: _Some_ of this is generic, so this should probably be implemented as an ExtendedBigDecimal trait (ToPrimitive).
            let v = match ebd {
                ExtendedBigDecimal::BigDecimal(bd) => {
@ -285,7 +285,7 @@ impl ExtendedParser for ExtendedBigDecimal {
    /// Parse a number as an ExtendedBigDecimal
    fn extended_parse(
        input: &str,
-    ) -> Result<ExtendedBigDecimal, ExtendedParserError<'_, ExtendedBigDecimal>> {
+    ) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
        parse(input, ParseTarget::Decimal, &[])
    }
 }
@ -349,11 +349,11 @@ fn parse_suffix_multiplier<'a>(str: &'a str, allowed_suffixes: &[(char, u32)]) -
    (1, str)
 }

-fn parse_special_value<'a>(
-    input: &'a str,
+fn parse_special_value(
+    input: &str,
    negative: bool,
    allowed_suffixes: &[(char, u32)],
-) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
+) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
    let input_lc = input.to_ascii_lowercase();

    // Array of ("String to match", return value when sign positive, when sign negative)
@ -376,7 +376,7 @@ fn parse_special_value<'a>(
            return if rest.is_empty() {
                Ok(special)
            } else {
-                Err(ExtendedParserError::PartialMatch(special, rest))
+                Err(ExtendedParserError::PartialMatch(special, rest.to_string()))
            };
        }
    }
@ -386,7 +386,7 @@ fn parse_special_value<'a>(

 /// Underflow/Overflow errors always contain 0 or infinity.
 /// overflow: true for overflow, false for underflow.
-fn make_error<'a>(overflow: bool, negative: bool) -> ExtendedParserError<'a, ExtendedBigDecimal> {
+fn make_error(overflow: bool, negative: bool) -> ExtendedParserError<ExtendedBigDecimal> {
    let mut v = if overflow {
        ExtendedBigDecimal::Infinity
    } else {
@ -468,13 +468,13 @@ fn pow_with_context(bd: &BigDecimal, exp: i64, ctx: &Context) -> BigDecimal {
 }

 /// Construct an [`ExtendedBigDecimal`] based on parsed data
-fn construct_extended_big_decimal<'a>(
+fn construct_extended_big_decimal(
    digits: BigUint,
    negative: bool,
    base: Base,
    scale: i64,
    exponent: BigInt,
-) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
+) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
    if digits == BigUint::zero() {
        // Return return 0 if the digits are zero. In particular, we do not ever
        // return Overflow/Underflow errors in that case.
@ -541,25 +541,13 @@ pub(crate) enum ParseTarget {
    Duration,
 }

-pub(crate) fn parse<'a>(
-    input: &'a str,
+pub(crate) fn parse(
+    input: &str,
    target: ParseTarget,
    allowed_suffixes: &[(char, u32)],
-) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
-    // Parse the " and ' prefixes separately
-    if target != ParseTarget::Duration {
-        if let Some(rest) = input.strip_prefix(['\'', '"']) {
-            let mut chars = rest.char_indices().fuse();
-            let v = chars
-                .next()
-                .map(|(_, c)| ExtendedBigDecimal::BigDecimal(u32::from(c).into()));
-            return match (v, chars.next()) {
-                (Some(v), None) => Ok(v),
-                (Some(v), Some((i, _))) => Err(ExtendedParserError::PartialMatch(v, &rest[i..])),
-                (None, _) => Err(ExtendedParserError::NotNumeric),
-            };
-        }
-    }
+) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
+    // Note: literals with ' and " prefixes are parsed earlier on in argument parsing,
+    // before UTF-8 conversion.

    let trimmed_input = input.trim_ascii_start();

@ -616,7 +604,7 @@ pub(crate) fn parse<'a>(
            } else {
                ExtendedBigDecimal::zero()
            };
-            return Err(ExtendedParserError::PartialMatch(ebd, partial));
+            return Err(ExtendedParserError::PartialMatch(ebd, partial.to_string()));
        }

        return if target == ParseTarget::Integral {
@ -640,7 +628,7 @@ pub(crate) fn parse<'a>(
    } else {
        Err(ExtendedParserError::PartialMatch(
            ebd_result.unwrap_or_else(|e| e.extract()),
-            rest,
+            rest.to_string(),
        ))
    }
 }
@ -686,14 +674,14 @@ mod tests {
            u64::extended_parse(""),
            Err(ExtendedParserError::NotNumeric)
        ));
-        assert!(matches!(
+        assert_eq!(
            u64::extended_parse("123.15"),
-            Err(ExtendedParserError::PartialMatch(123, ".15"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(123, ".15".to_string()))
+        );
+        assert_eq!(
            u64::extended_parse("123e10"),
-            Err(ExtendedParserError::PartialMatch(123, "e10"))
-        ));
+            Err(ExtendedParserError::PartialMatch(123, "e10".to_string()))
+        );
    }

    #[test]
@ -707,18 +695,18 @@ mod tests {
        ));
        assert_eq!(Ok(i64::MAX), i64::extended_parse(&format!("{}", i64::MAX)));
        assert_eq!(Ok(i64::MIN), i64::extended_parse(&format!("{}", i64::MIN)));
-        assert!(matches!(
+        assert_eq!(
            i64::extended_parse(&format!("{}", u64::MAX)),
            Err(ExtendedParserError::Overflow(i64::MAX))
-        ));
+        );
        assert!(matches!(
            i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)),
            Err(ExtendedParserError::Overflow(i64::MAX))
        ));
-        assert!(matches!(
+        assert_eq!(
            i64::extended_parse("-123e10"),
-            Err(ExtendedParserError::PartialMatch(-123, "e10"))
-        ));
+            Err(ExtendedParserError::PartialMatch(-123, "e10".to_string()))
+        );
        assert!(matches!(
            i64::extended_parse(&format!("{}", -(u64::MAX as i128))),
            Err(ExtendedParserError::Overflow(i64::MIN))
@ -770,20 +758,34 @@ mod tests {
            Ok(0.15),
            f64::extended_parse(".150000000000000000000000000231313")
        );
-        assert!(matches!(f64::extended_parse("123.15e"),
-                         Err(ExtendedParserError::PartialMatch(f, "e")) if f == 123.15));
-        assert!(matches!(f64::extended_parse("123.15E"),
-                         Err(ExtendedParserError::PartialMatch(f, "E")) if f == 123.15));
-        assert!(matches!(f64::extended_parse("123.15e-"),
-                         Err(ExtendedParserError::PartialMatch(f, "e-")) if f == 123.15));
-        assert!(matches!(f64::extended_parse("123.15e+"),
-                         Err(ExtendedParserError::PartialMatch(f, "e+")) if f == 123.15));
-        assert!(matches!(f64::extended_parse("123.15e."),
-                         Err(ExtendedParserError::PartialMatch(f, "e.")) if f == 123.15));
-        assert!(matches!(f64::extended_parse("1.2.3"),
-                         Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2));
-        assert!(matches!(f64::extended_parse("123.15p5"),
-                        Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15));
+        assert_eq!(
+            f64::extended_parse("123.15e"),
+            Err(ExtendedParserError::PartialMatch(123.15, "e".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("123.15E"),
+            Err(ExtendedParserError::PartialMatch(123.15, "E".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("123.15e-"),
+            Err(ExtendedParserError::PartialMatch(123.15, "e-".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("123.15e+"),
+            Err(ExtendedParserError::PartialMatch(123.15, "e+".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("123.15e."),
+            Err(ExtendedParserError::PartialMatch(123.15, "e.".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("1.2.3"),
+            Err(ExtendedParserError::PartialMatch(1.2, ".3".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("123.15p5"),
+            Err(ExtendedParserError::PartialMatch(123.15, "p5".to_string()))
+        );
        // Minus zero. 0.0 == -0.0 so we explicitly check the sign.
        assert_eq!(Ok(0.0), f64::extended_parse("-0.0"));
        assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative());
@ -806,10 +808,20 @@ mod tests {
        assert!(f64::extended_parse("nan").unwrap().is_sign_positive());
        assert!(f64::extended_parse("NAN").unwrap().is_nan());
        assert!(f64::extended_parse("NAN").unwrap().is_sign_positive());
-        assert!(matches!(f64::extended_parse("-infinit"),
-                         Err(ExtendedParserError::PartialMatch(f, "init")) if f == f64::NEG_INFINITY));
-        assert!(matches!(f64::extended_parse("-infinity00"),
-                         Err(ExtendedParserError::PartialMatch(f, "00")) if f == f64::NEG_INFINITY));
+        assert_eq!(
+            f64::extended_parse("-infinit"),
+            Err(ExtendedParserError::PartialMatch(
+                f64::NEG_INFINITY,
+                "init".to_string()
+            ))
+        );
+        assert_eq!(
+            f64::extended_parse("-infinity00"),
+            Err(ExtendedParserError::PartialMatch(
+                f64::NEG_INFINITY,
+                "00".to_string()
+            ))
+        );
        assert!(f64::extended_parse(&format!("{}", u64::MAX)).is_ok());
        assert!(f64::extended_parse(&format!("{}", i64::MIN)).is_ok());

@ -994,14 +1006,22 @@ mod tests {
        // but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3
        assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5"));

-        assert!(matches!(f64::extended_parse("0x0.1p"),
-                        Err(ExtendedParserError::PartialMatch(f, "p")) if f == 0.0625));
-        assert!(matches!(f64::extended_parse("0x0.1p-"),
-                        Err(ExtendedParserError::PartialMatch(f, "p-")) if f == 0.0625));
-        assert!(matches!(f64::extended_parse("0x.1p+"),
-                        Err(ExtendedParserError::PartialMatch(f, "p+")) if f == 0.0625));
-        assert!(matches!(f64::extended_parse("0x.1p."),
-                        Err(ExtendedParserError::PartialMatch(f, "p.")) if f == 0.0625));
+        assert_eq!(
+            f64::extended_parse("0x0.1p"),
+            Err(ExtendedParserError::PartialMatch(0.0625, "p".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("0x0.1p-"),
+            Err(ExtendedParserError::PartialMatch(0.0625, "p-".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("0x.1p+"),
+            Err(ExtendedParserError::PartialMatch(0.0625, "p+".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("0x.1p."),
+            Err(ExtendedParserError::PartialMatch(0.0625, "p.".to_string()))
+        );

        assert_eq!(
            Ok(ExtendedBigDecimal::BigDecimal(
@ -1061,40 +1081,58 @@ mod tests {
        ));

        // Not actually hex numbers, but the prefixes look like it.
-        assert!(matches!(f64::extended_parse("0x"),
-            Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0));
-        assert!(matches!(f64::extended_parse("0x."),
-            Err(ExtendedParserError::PartialMatch(f, "x.")) if f == 0.0));
-        assert!(matches!(f64::extended_parse("0xp"),
-            Err(ExtendedParserError::PartialMatch(f, "xp")) if f == 0.0));
-        assert!(matches!(f64::extended_parse("0xp-2"),
-            Err(ExtendedParserError::PartialMatch(f, "xp-2")) if f == 0.0));
-        assert!(matches!(f64::extended_parse("0x.p-2"),
-            Err(ExtendedParserError::PartialMatch(f, "x.p-2")) if f == 0.0));
-        assert!(matches!(f64::extended_parse("0X"),
-            Err(ExtendedParserError::PartialMatch(f, "X")) if f == 0.0));
-        assert!(matches!(f64::extended_parse("-0x"),
-            Err(ExtendedParserError::PartialMatch(f, "x")) if f == -0.0));
-        assert!(matches!(f64::extended_parse("+0x"),
-            Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0));
-        assert!(matches!(f64::extended_parse("-0x."),
-            Err(ExtendedParserError::PartialMatch(f, "x.")) if f == -0.0));
-        assert!(matches!(
+        assert_eq!(
+            f64::extended_parse("0x"),
+            Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("0x."),
+            Err(ExtendedParserError::PartialMatch(0.0, "x.".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("0xp"),
+            Err(ExtendedParserError::PartialMatch(0.0, "xp".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("0xp-2"),
+            Err(ExtendedParserError::PartialMatch(0.0, "xp-2".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("0x.p-2"),
+            Err(ExtendedParserError::PartialMatch(0.0, "x.p-2".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("0X"),
+            Err(ExtendedParserError::PartialMatch(0.0, "X".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("-0x"),
+            Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("+0x"),
+            Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
+        );
+        assert_eq!(
+            f64::extended_parse("-0x."),
+            Err(ExtendedParserError::PartialMatch(-0.0, "x.".to_string()))
+        );
+        assert_eq!(
            u64::extended_parse("0x"),
-            Err(ExtendedParserError::PartialMatch(0, "x"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
+        );
+        assert_eq!(
            u64::extended_parse("-0x"),
-            Err(ExtendedParserError::PartialMatch(0, "x"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
+        );
+        assert_eq!(
            i64::extended_parse("0x"),
-            Err(ExtendedParserError::PartialMatch(0, "x"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
+        );
+        assert_eq!(
            i64::extended_parse("-0x"),
-            Err(ExtendedParserError::PartialMatch(0, "x"))
-        ));
+            Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
+        );
    }

    #[test]
@ -1105,18 +1143,18 @@ mod tests {
        assert_eq!(Ok(-0o123), i64::extended_parse("-0123"));
        assert_eq!(Ok(0o123), u64::extended_parse("00123"));
        assert_eq!(Ok(0), u64::extended_parse("00"));
-        assert!(matches!(
+        assert_eq!(
            u64::extended_parse("008"),
-            Err(ExtendedParserError::PartialMatch(0, "8"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "8".to_string()))
+        );
+        assert_eq!(
            u64::extended_parse("08"),
-            Err(ExtendedParserError::PartialMatch(0, "8"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "8".to_string()))
+        );
+        assert_eq!(
            u64::extended_parse("0."),
-            Err(ExtendedParserError::PartialMatch(0, "."))
-        ));
+            Err(ExtendedParserError::PartialMatch(0, ".".to_string()))
+        );

        // No float tests, leading zeros get parsed as decimal anyway.
    }
@ -1128,51 +1166,62 @@ mod tests {
        assert_eq!(Ok(0b1011), u64::extended_parse("+0b1011"));
        assert_eq!(Ok(-0b1011), i64::extended_parse("-0b1011"));

-        assert!(matches!(
+        assert_eq!(
            u64::extended_parse("0b"),
-            Err(ExtendedParserError::PartialMatch(0, "b"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
+        );
+        assert_eq!(
            u64::extended_parse("0b."),
-            Err(ExtendedParserError::PartialMatch(0, "b."))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "b.".to_string()))
+        );
+        assert_eq!(
            u64::extended_parse("-0b"),
-            Err(ExtendedParserError::PartialMatch(0, "b"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
+        );
+        assert_eq!(
            i64::extended_parse("0b"),
-            Err(ExtendedParserError::PartialMatch(0, "b"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
+        );
+        assert_eq!(
            i64::extended_parse("-0b"),
-            Err(ExtendedParserError::PartialMatch(0, "b"))
-        ));
+            Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
+        );

        // Binary not allowed for floats
-        assert!(matches!(
+        assert_eq!(
            f64::extended_parse("0b100"),
-            Err(ExtendedParserError::PartialMatch(0f64, "b100"))
-        ));
-        assert!(matches!(
+            Err(ExtendedParserError::PartialMatch(0f64, "b100".to_string()))
+        );
+        assert_eq!(
            f64::extended_parse("0b100.1"),
-            Err(ExtendedParserError::PartialMatch(0f64, "b100.1"))
-        ));
+            Err(ExtendedParserError::PartialMatch(
+                0f64,
+                "b100.1".to_string()
+            ))
+        );

-        assert!(match ExtendedBigDecimal::extended_parse("0b100.1") {
-            Err(ExtendedParserError::PartialMatch(ebd, "b100.1")) =>
-                ebd == ExtendedBigDecimal::zero(),
-            _ => false,
-        });
+        assert_eq!(
+            ExtendedBigDecimal::extended_parse("0b100.1"),
+            Err(ExtendedParserError::PartialMatch(
+                ExtendedBigDecimal::zero(),
+                "b100.1".to_string()
+            ))
+        );

-        assert!(match ExtendedBigDecimal::extended_parse("0b") {
-            Err(ExtendedParserError::PartialMatch(ebd, "b")) => ebd == ExtendedBigDecimal::zero(),
-            _ => false,
-        });
-        assert!(match ExtendedBigDecimal::extended_parse("0b.") {
-            Err(ExtendedParserError::PartialMatch(ebd, "b.")) => ebd == ExtendedBigDecimal::zero(),
-            _ => false,
-        });
+        assert_eq!(
+            ExtendedBigDecimal::extended_parse("0b"),
+            Err(ExtendedParserError::PartialMatch(
+                ExtendedBigDecimal::zero(),
+                "b".to_string()
+            ))
+        );
+        assert_eq!(
+            ExtendedBigDecimal::extended_parse("0b."),
+            Err(ExtendedParserError::PartialMatch(
+                ExtendedBigDecimal::zero(),
+                "b.".to_string()
+            ))
+        );
    }

    #[test]
@ -1185,15 +1234,15 @@ mod tests {

        // Ensure that trailing whitespace is still a partial match
        assert_eq!(
-            Err(ExtendedParserError::PartialMatch(6, " ")),
+            Err(ExtendedParserError::PartialMatch(6, " ".to_string())),
            u64::extended_parse("0x6 ")
        );
        assert_eq!(
-            Err(ExtendedParserError::PartialMatch(7, "\t")),
+            Err(ExtendedParserError::PartialMatch(7, "\t".to_string())),
            u64::extended_parse("0x7\t")
        );
        assert_eq!(
-            Err(ExtendedParserError::PartialMatch(8, "\n")),
+            Err(ExtendedParserError::PartialMatch(8, "\n".to_string())),
            u64::extended_parse("0x8\n")
        );

--- a/src/uucore/src/lib/lib.rs
+++ b/src/uucore/src/lib/lib.rs
@ -311,23 +311,39 @@ pub fn read_yes() -> bool {
    }
 }

+#[derive(Debug)]
+pub struct NonUtf8OsStrError {
+    input_lossy_string: String,
+}
+
+impl std::fmt::Display for NonUtf8OsStrError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use os_display::Quotable;
+        let quoted = self.input_lossy_string.quote();
+        f.write_fmt(format_args!(
+            "invalid UTF-8 input {quoted} encountered when converting to bytes on a platform that doesn't expose byte arguments",
+        ))
+    }
+}
+
+impl std::error::Error for NonUtf8OsStrError {}
+impl error::UError for NonUtf8OsStrError {}
+
 /// Converts an `OsStr` to a UTF-8 `&[u8]`.
 ///
 /// This always succeeds on unix platforms,
 /// and fails on other platforms if the string can't be coerced to UTF-8.
-pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
+pub fn os_str_as_bytes(os_string: &OsStr) -> Result<&[u8], NonUtf8OsStrError> {
    #[cfg(unix)]
-    let bytes = os_string.as_bytes();
+    return Ok(os_string.as_bytes());

    #[cfg(not(unix))]
-    let bytes = os_string
+    os_string
        .to_str()
-        .ok_or_else(|| {
-            mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
-        })?
-        .as_bytes();
-
-    Ok(bytes)
+        .ok_or_else(|| NonUtf8OsStrError {
+            input_lossy_string: os_string.to_string_lossy().into_owned(),
+        })
+        .map(|s| s.as_bytes())
 }

 /// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
@ -336,15 +352,13 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
 /// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
 pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
    #[cfg(unix)]
-    let bytes = Cow::from(os_string.as_bytes());
+    return Cow::from(os_string.as_bytes());

    #[cfg(not(unix))]
-    let bytes = match os_string.to_string_lossy() {
+    match os_string.to_string_lossy() {
        Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
        Cow::Owned(owned) => Cow::from(owned.into_bytes()),
-    };
-
-    bytes
+    }
 }

 /// Converts a `&[u8]` to an `&OsStr`,
@ -354,13 +368,12 @@ pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
 /// and fails on other platforms if the bytes can't be parsed as UTF-8.
 pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
    #[cfg(unix)]
-    let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
-    #[cfg(not(unix))]
-    let os_str = Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| {
-        mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr")
-    })?));
+    return Ok(Cow::Borrowed(OsStr::from_bytes(bytes)));

-    Ok(os_str)
+    #[cfg(not(unix))]
+    Ok(Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(
+        |_| mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr"),
+    )?)))
 }

 /// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
@ -369,13 +382,12 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
 /// and fails on other platforms if the bytes can't be parsed as UTF-8.
 pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
    #[cfg(unix)]
-    let s = OsString::from_vec(vec);
-    #[cfg(not(unix))]
-    let s = OsString::from(String::from_utf8(vec).map_err(|_| {
-        mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
-    })?);
+    return Ok(OsString::from_vec(vec));

-    Ok(s)
+    #[cfg(not(unix))]
+    Ok(OsString::from(String::from_utf8(vec).map_err(|_| {
+        mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
+    })?))
 }

 /// Converts an `OsString` into a `Vec<u8>`, parsing as UTF-8 on non-unix platforms.
--- a/tests/by-util/test_printf.rs
+++ b/tests/by-util/test_printf.rs
@ -805,7 +805,7 @@ fn test_overflow() {
 fn partial_char() {
    new_ucmd!()
        .args(&["%d", "'abc"])
-        .fails_with_code(1)
+        .succeeds()
        .stdout_is("97")
        .stderr_is(
            "printf: warning: bc: character(s) following character constant have been ignored\n",
@ -1293,23 +1293,80 @@ fn float_arg_with_whitespace() {

 #[test]
 fn mb_input() {
-    for format in ["\"á", "\'á", "'\u{e1}"] {
+    let cases = vec![
+        ("%04x\n", "\"á", "00e1\n"),
+        ("%04x\n", "'á", "00e1\n"),
+        ("%04x\n", "'\u{e1}", "00e1\n"),
+        ("%i\n", "\"á", "225\n"),
+        ("%i\n", "'á", "225\n"),
+        ("%i\n", "'\u{e1}", "225\n"),
+        ("%f\n", "'á", "225.000000\n"),
+    ];
+    for (format, arg, stdout) in cases {
        new_ucmd!()
-            .args(&["%04x\n", format])
+            .args(&[format, arg])
            .succeeds()
-            .stdout_only("00e1\n");
+            .stdout_only(stdout);
    }

    let cases = vec![
-        ("\"á=", "="),
-        ("\'á-", "-"),
-        ("\'á=-==", "=-=="),
-        ("'\u{e1}++", "++"),
+        ("%04x\n", "\"á=", "00e1\n", "="),
+        ("%04x\n", "'á-", "00e1\n", "-"),
+        ("%04x\n", "'á=-==", "00e1\n", "=-=="),
+        ("%04x\n", "'á'", "00e1\n", "'"),
+        ("%04x\n", "'\u{e1}++", "00e1\n", "++"),
+        ("%04x\n", "''á'", "0027\n", "á'"),
+        ("%i\n", "\"á=", "225\n", "="),
    ];
-
-    for (format, expected) in cases {
+    for (format, arg, stdout, stderr) in cases {
        new_ucmd!()
-            .args(&["%04x\n", format])
+            .args(&[format, arg])
+            .succeeds()
+            .stdout_is(stdout)
+            .stderr_is(format!("printf: warning: {stderr}: character(s) following character constant have been ignored\n"));
+    }
+
+    for arg in ["\"", "'"] {
+        new_ucmd!()
+            .args(&["%04x\n", arg])
+            .fails()
+            .stderr_contains("expected a numeric value");
+    }
+}
+
+#[test]
+#[cfg(target_family = "unix")]
+fn mb_invalid_unicode() {
+    use std::ffi::OsStr;
+    use std::os::unix::ffi::OsStrExt;
+
+    let cases = vec![
+        ("%04x\n", b"\"\xe1", "00e1\n"),
+        ("%04x\n", b"'\xe1", "00e1\n"),
+        ("%i\n", b"\"\xe1", "225\n"),
+        ("%i\n", b"'\xe1", "225\n"),
+        ("%f\n", b"'\xe1", "225.000000\n"),
+    ];
+    for (format, arg, stdout) in cases {
+        new_ucmd!()
+            .arg(format)
+            .arg(OsStr::from_bytes(arg))
+            .succeeds()
+            .stdout_only(stdout);
+    }
+
+    let cases = vec![
+        (b"\"\xe1=".as_slice(), "="),
+        (b"'\xe1-".as_slice(), "-"),
+        (b"'\xe1=-==".as_slice(), "=-=="),
+        (b"'\xe1'".as_slice(), "'"),
+        // unclear if original or replacement character is better in stderr
+        //(b"''\xe1'".as_slice(), "'<27>'"),
+    ];
+    for (arg, expected) in cases {
+        new_ucmd!()
+            .arg("%04x\n")
+            .arg(OsStr::from_bytes(arg))
            .succeeds()
            .stdout_is("00e1\n")
            .stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n"));
@ -1364,3 +1421,35 @@ fn positional_format_specifiers() {
        .succeeds()
        .stdout_only("Octal: 115, Int: 42, Float: 3.141590, String: hello, Hex: ff, Scientific: 1.000000e-05, Char: A, Unsigned: 100, Integer: 123");
 }
+
+#[test]
+#[cfg(target_family = "unix")]
+fn non_utf_8_input() {
+    use std::ffi::OsStr;
+    use std::os::unix::ffi::OsStrExt;
+
+    // ISO-8859-1 encoded text
+    // spell-checker:disable
+    const INPUT_AND_OUTPUT: &[u8] =
+        b"Swer an rehte g\xFCete wendet s\xEEn gem\xFCete, dem volget s\xE6lde und \xEAre.";
+    // spell-checker:enable
+
+    let os_str = OsStr::from_bytes(INPUT_AND_OUTPUT);
+
+    new_ucmd!()
+        .arg("%s")
+        .arg(os_str)
+        .succeeds()
+        .stdout_only_bytes(INPUT_AND_OUTPUT);
+
+    new_ucmd!()
+        .arg(os_str)
+        .succeeds()
+        .stdout_only_bytes(INPUT_AND_OUTPUT);
+
+    new_ucmd!()
+        .arg("%d")
+        .arg(os_str)
+        .fails()
+        .stderr_contains("expected a numeric value");
+}
--- a/util/why-error.md
+++ b/util/why-error.md
@ -38,11 +38,7 @@ This file documents why some tests are failing:
 * gnu/tests/mv/part-hardlink.sh
 * gnu/tests/od/od-N.sh
 * gnu/tests/od/od-float.sh
-* gnu/tests/printf/printf-cov.pl
-* gnu/tests/printf/printf-indexed.sh
-* gnu/tests/printf/printf-mb.sh
 * gnu/tests/printf/printf-quote.sh
-* gnu/tests/printf/printf.sh
 * gnu/tests/ptx/ptx-overrun.sh
 * gnu/tests/ptx/ptx.pl
 * gnu/tests/rm/empty-inacc.sh - https://github.com/uutils/coreutils/issues/7033