Merge pull request #8329 from drinkcat/printf-7209-update
Some checks are pending
CICD / Style/cargo-deny (push) Waiting to run
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Build (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
GnuTests / Run GNU tests (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run

printf: accept non-UTF-8 input in FORMAT and ARGUMENT arguments
This commit is contained in:
Dorian Péron 2025-07-16 00:28:44 +02:00 committed by GitHub
commit 1bb7930a08
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 512 additions and 292 deletions

View file

@ -8,10 +8,9 @@ use clap::{Arg, ArgAction, Command};
use std::env;
use std::ffi::{OsStr, OsString};
use std::io::{self, StdoutLock, Write};
use uucore::error::{UResult, USimpleError};
use uucore::error::UResult;
use uucore::format::{FormatChar, OctalParsing, parse_escape_only};
use uucore::format_usage;
use uucore::os_str_as_bytes;
use uucore::{format_usage, os_str_as_bytes};
use uucore::locale::get_message;
@ -223,9 +222,9 @@ pub fn uu_app() -> Command {
fn execute(stdout: &mut StdoutLock, args: Vec<OsString>, options: Options) -> UResult<()> {
for (i, arg) in args.into_iter().enumerate() {
let bytes = os_str_as_bytes(arg.as_os_str())
.map_err(|_| USimpleError::new(1, get_message("echo-error-non-utf8")))?;
let bytes = os_str_as_bytes(&arg)?;
// Don't print a space before the first argument
if i > 0 {
stdout.write_all(b" ")?;
}

View file

@ -4,6 +4,7 @@
// file that was distributed with this source code.
use clap::{Arg, ArgAction, Command};
use std::collections::HashMap;
use std::ffi::OsString;
use std::io::stdout;
use std::ops::ControlFlow;
use uucore::error::{UResult, UUsageError};
@ -18,21 +19,19 @@ mod options {
pub const FORMAT: &str = "FORMAT";
pub const ARGUMENT: &str = "ARGUMENT";
}
#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let matches = uu_app().get_matches_from(args);
let format = matches
.get_one::<std::ffi::OsString>(options::FORMAT)
.get_one::<OsString>(options::FORMAT)
.ok_or_else(|| UUsageError::new(1, get_message("printf-error-missing-operand")))?;
let format = os_str_as_bytes(format)?;
let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) {
// FIXME: use os_str_as_bytes once FormatArgument supports Vec<u8>
let values: Vec<_> = match matches.get_many::<OsString>(options::ARGUMENT) {
Some(s) => s
.map(|os_string| {
FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string())
})
.map(|os_string| FormatArgument::Unparsed(os_string.to_owned()))
.collect(),
None => vec![],
};
@ -62,7 +61,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
"{}",
get_message_with_args(
"printf-warning-ignoring-excess-arguments",
HashMap::from([("arg".to_string(), arg_str.to_string())])
HashMap::from([("arg".to_string(), arg_str.to_string_lossy().to_string())])
)
);
}
@ -103,10 +102,10 @@ pub fn uu_app() -> Command {
.help(get_message("printf-help-version"))
.action(ArgAction::Version),
)
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString)))
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(OsString)))
.arg(
Arg::new(options::ARGUMENT)
.action(ArgAction::Append)
.value_parser(clap::value_parser!(std::ffi::OsString)),
.value_parser(clap::value_parser!(OsString)),
)
}

View file

@ -968,7 +968,7 @@ fn process_checksum_line(
cached_line_format: &mut Option<LineFormat>,
last_algo: &mut Option<String>,
) -> Result<(), LineCheckError> {
let line_bytes = os_str_as_bytes(line)?;
let line_bytes = os_str_as_bytes(line).map_err(|e| LineCheckError::UError(Box::new(e)))?;
// Early return on empty or commented lines.
if line.is_empty() || line_bytes.starts_with(b"#") {

View file

@ -101,6 +101,18 @@ impl From<f64> for ExtendedBigDecimal {
}
}
impl From<u8> for ExtendedBigDecimal {
fn from(val: u8) -> Self {
Self::BigDecimal(val.into())
}
}
impl From<u32> for ExtendedBigDecimal {
fn from(val: u32) -> Self {
Self::BigDecimal(val.into())
}
}
impl ExtendedBigDecimal {
pub fn zero() -> Self {
Self::BigDecimal(0.into())

View file

@ -7,12 +7,16 @@ use super::ExtendedBigDecimal;
use crate::format::spec::ArgumentLocation;
use crate::{
error::set_exit_code,
os_str_as_bytes,
parser::num_parser::{ExtendedParser, ExtendedParserError},
quoting_style::{QuotingStyle, locale_aware_escape_name},
show_error, show_warning,
};
use os_display::Quotable;
use std::{ffi::OsStr, num::NonZero};
use std::{
ffi::{OsStr, OsString},
num::NonZero,
};
/// An argument for formatting
///
@ -24,12 +28,12 @@ use std::{ffi::OsStr, num::NonZero};
#[derive(Clone, Debug, PartialEq)]
pub enum FormatArgument {
Char(char),
String(String),
String(OsString),
UnsignedInt(u64),
SignedInt(i64),
Float(ExtendedBigDecimal),
/// Special argument that gets coerced into the other variants
Unparsed(String),
Unparsed(OsString),
}
/// A struct that holds a slice of format arguments and provides methods to access them
@ -72,22 +76,25 @@ impl<'a> FormatArguments<'a> {
pub fn next_char(&mut self, position: &ArgumentLocation) -> u8 {
match self.next_arg(position) {
Some(FormatArgument::Char(c)) => *c as u8,
Some(FormatArgument::Unparsed(s)) => s.bytes().next().unwrap_or(b'\0'),
Some(FormatArgument::Unparsed(os)) => match os_str_as_bytes(os) {
Ok(bytes) => bytes.first().copied().unwrap_or(b'\0'),
Err(_) => b'\0',
},
_ => b'\0',
}
}
pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a str {
pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a OsStr {
match self.next_arg(position) {
Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s,
_ => "",
Some(FormatArgument::Unparsed(os) | FormatArgument::String(os)) => os,
_ => "".as_ref(),
}
}
pub fn next_i64(&mut self, position: &ArgumentLocation) -> i64 {
match self.next_arg(position) {
Some(FormatArgument::SignedInt(n)) => *n,
Some(FormatArgument::Unparsed(s)) => extract_value(i64::extended_parse(s), s),
Some(FormatArgument::Unparsed(os)) => Self::get_num::<i64>(os),
_ => 0,
}
}
@ -95,25 +102,7 @@ impl<'a> FormatArguments<'a> {
pub fn next_u64(&mut self, position: &ArgumentLocation) -> u64 {
match self.next_arg(position) {
Some(FormatArgument::UnsignedInt(n)) => *n,
Some(FormatArgument::Unparsed(s)) => {
// Check if the string is a character literal enclosed in quotes
if s.starts_with(['"', '\'']) {
// Extract the content between the quotes safely using chars
let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars();
if let Some(first_char) = chars.next() {
if chars.clone().count() > 0 {
// Emit a warning if there are additional characters
let remaining: String = chars.collect();
show_warning!(
"{remaining}: character(s) following character constant have been ignored"
);
}
return first_char as u64; // Use only the first character
}
return 0; // Empty quotes
}
extract_value(u64::extended_parse(s), s)
}
Some(FormatArgument::Unparsed(os)) => Self::get_num::<u64>(os),
_ => 0,
}
}
@ -121,13 +110,81 @@ impl<'a> FormatArguments<'a> {
pub fn next_extended_big_decimal(&mut self, position: &ArgumentLocation) -> ExtendedBigDecimal {
match self.next_arg(position) {
Some(FormatArgument::Float(n)) => n.clone(),
Some(FormatArgument::Unparsed(s)) => {
extract_value(ExtendedBigDecimal::extended_parse(s), s)
}
Some(FormatArgument::Unparsed(os)) => Self::get_num::<ExtendedBigDecimal>(os),
_ => ExtendedBigDecimal::zero(),
}
}
// Parse an OsStr that we know to start with a '/"
fn parse_quote_start<T>(os: &OsStr) -> Result<T, ExtendedParserError<T>>
where
T: ExtendedParser + From<u8> + From<u32> + Default,
{
// If this fails (this can only happens on Windows), then just
// return NotNumeric.
let s = match os_str_as_bytes(os) {
Ok(s) => s,
Err(_) => return Err(ExtendedParserError::NotNumeric),
};
let bytes = match s.split_first() {
Some((b'"', bytes)) | Some((b'\'', bytes)) => bytes,
_ => {
// This really can't happen, the string we are given must start with '/".
debug_assert!(false);
return Err(ExtendedParserError::NotNumeric);
}
};
if bytes.is_empty() {
return Err(ExtendedParserError::NotNumeric);
}
let (val, len) = if let Some(c) = bytes
.utf8_chunks()
.next()
.expect("bytes should not be empty")
.valid()
.chars()
.next()
{
// Valid UTF-8 character, cast the codepoint to u32 then T
// (largest unicode codepoint is only 3 bytes, so this is safe)
((c as u32).into(), c.len_utf8())
} else {
// Not a valid UTF-8 character, use the first byte
(bytes[0].into(), 1)
};
// Emit a warning if there are additional characters
if bytes.len() > len {
return Err(ExtendedParserError::PartialMatch(
val,
String::from_utf8_lossy(&bytes[len..]).into_owned(),
));
}
Ok(val)
}
fn get_num<T>(os: &OsStr) -> T
where
T: ExtendedParser + From<u8> + From<u32> + Default,
{
let s = os.to_string_lossy();
let first = s.as_bytes().first().copied();
let quote_start = first == Some(b'"') || first == Some(b'\'');
let parsed = if quote_start {
// The string begins with a quote
Self::parse_quote_start(os)
} else {
T::extended_parse(&s)
};
// Get the best possible value, even if parsed was an error.
extract_value(parsed, &s, quote_start)
}
fn get_at_relative_position(&mut self, pos: NonZero<usize>) -> Option<&'a FormatArgument> {
let pos: usize = pos.into();
let pos = (pos - 1).saturating_add(self.current_offset);
@ -147,7 +204,11 @@ impl<'a> FormatArguments<'a> {
}
}
fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &str) -> T {
fn extract_value<T: Default>(
p: Result<T, ExtendedParserError<T>>,
input: &str,
quote_start: bool,
) -> T {
match p {
Ok(v) => v,
Err(e) => {
@ -167,14 +228,15 @@ fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &s
Default::default()
}
ExtendedParserError::PartialMatch(v, rest) => {
let bytes = input.as_encoded_bytes();
if !bytes.is_empty() && (bytes[0] == b'\'' || bytes[0] == b'"') {
if quote_start {
set_exit_code(0);
show_warning!(
"{rest}: character(s) following character constant have been ignored"
);
} else {
show_error!("{}: value not completely converted", input.quote());
}
v
}
}
@ -249,11 +311,11 @@ mod tests {
// Test with different method types in sequence
let args = [
FormatArgument::Char('a'),
FormatArgument::String("hello".to_string()),
FormatArgument::Unparsed("123".to_string()),
FormatArgument::String("world".to_string()),
FormatArgument::String("hello".into()),
FormatArgument::Unparsed("123".into()),
FormatArgument::String("world".into()),
FormatArgument::Char('z'),
FormatArgument::String("test".to_string()),
FormatArgument::String("test".into()),
];
let mut args = FormatArguments::new(&args);
@ -384,10 +446,10 @@ mod tests {
fn test_unparsed_arguments() {
// Test with unparsed arguments that get coerced
let args = [
FormatArgument::Unparsed("hello".to_string()),
FormatArgument::Unparsed("123".to_string()),
FormatArgument::Unparsed("hello".to_string()),
FormatArgument::Unparsed("456".to_string()),
FormatArgument::Unparsed("hello".into()),
FormatArgument::Unparsed("123".into()),
FormatArgument::Unparsed("hello".into()),
FormatArgument::Unparsed("456".into()),
];
let mut args = FormatArguments::new(&args);
@ -409,10 +471,10 @@ mod tests {
// Test with mixed types and positional access
let args = [
FormatArgument::Char('a'),
FormatArgument::String("test".to_string()),
FormatArgument::String("test".into()),
FormatArgument::UnsignedInt(42),
FormatArgument::Char('b'),
FormatArgument::String("more".to_string()),
FormatArgument::String("more".into()),
FormatArgument::UnsignedInt(99),
];
let mut args = FormatArguments::new(&args);

View file

@ -37,8 +37,12 @@ pub mod human;
pub mod num_format;
mod spec;
pub use self::escape::{EscapedChar, OctalParsing};
use crate::extendedbigdecimal::ExtendedBigDecimal;
pub use argument::*;
pub use argument::{FormatArgument, FormatArguments};
use self::{escape::parse_escape_code, num_format::Formatter};
use crate::{NonUtf8OsStrError, error::UError};
pub use spec::Spec;
use std::{
error::Error,
@ -50,13 +54,6 @@ use std::{
use os_display::Quotable;
use crate::error::UError;
pub use self::{
escape::{EscapedChar, OctalParsing, parse_escape_code},
num_format::Formatter,
};
#[derive(Debug)]
pub enum FormatError {
SpecError(Vec<u8>),
@ -74,6 +71,7 @@ pub enum FormatError {
/// The hexadecimal characters represent a code point that cannot represent a
/// Unicode character (e.g., a surrogate code point)
InvalidCharacter(char, Vec<u8>),
InvalidEncoding(NonUtf8OsStrError),
}
impl Error for FormatError {}
@ -85,6 +83,12 @@ impl From<std::io::Error> for FormatError {
}
}
impl From<NonUtf8OsStrError> for FormatError {
fn from(value: NonUtf8OsStrError) -> FormatError {
FormatError::InvalidEncoding(value)
}
}
impl Display for FormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@ -118,6 +122,7 @@ impl Display for FormatError {
"invalid universal character name \\{escape_char}{}",
String::from_utf8_lossy(digits)
),
Self::InvalidEncoding(no) => no.fmt(f),
}
}
}

View file

@ -5,8 +5,6 @@
// spell-checker:ignore (vars) intmax ptrdiff padlen
use crate::quoting_style::{QuotingStyle, locale_aware_escape_name};
use super::{
ExtendedBigDecimal, FormatChar, FormatError, OctalParsing,
num_format::{
@ -15,7 +13,11 @@ use super::{
},
parse_escape_only,
};
use crate::format::FormatArguments;
use crate::{
format::FormatArguments,
os_str_as_bytes,
quoting_style::{QuotingStyle, locale_aware_escape_name},
};
use std::{io::Write, num::NonZero, ops::ControlFlow};
/// A parsed specification for formatting a value
@ -375,22 +377,21 @@ impl Spec {
// TODO: We need to not use Rust's formatting for aligning the output,
// so that we can just write bytes to stdout without panicking.
let precision = resolve_asterisk_precision(*precision, args);
let s = args.next_string(position);
let os_str = args.next_string(position);
let bytes = os_str_as_bytes(os_str)?;
let truncated = match precision {
Some(p) if p < s.len() => &s[..p],
_ => s,
Some(p) if p < os_str.len() => &bytes[..p],
_ => bytes,
};
write_padded(
writer,
truncated.as_bytes(),
width,
*align_left || neg_width,
)
write_padded(writer, truncated, width, *align_left || neg_width)
}
Self::EscapedString { position } => {
let s = args.next_string(position);
let mut parsed = Vec::new();
for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) {
let os_str = args.next_string(position);
let bytes = os_str_as_bytes(os_str)?;
let mut parsed = Vec::<u8>::new();
for c in parse_escape_only(bytes, OctalParsing::ThreeDigits) {
match c.write(&mut parsed)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => {
@ -403,15 +404,11 @@ impl Spec {
}
Self::QuotedString { position } => {
let s = locale_aware_escape_name(
args.next_string(position).as_ref(),
args.next_string(position),
QuotingStyle::SHELL_ESCAPE,
);
#[cfg(unix)]
let bytes = std::os::unix::ffi::OsStringExt::into_vec(s);
#[cfg(not(unix))]
let bytes = s.to_string_lossy().as_bytes().to_owned();
writer.write_all(&bytes).map_err(FormatError::IoError)
let bytes = os_str_as_bytes(&s)?;
writer.write_all(bytes).map_err(FormatError::IoError)
}
Self::SignedInt {
width,
@ -646,7 +643,7 @@ mod tests {
Some((42, false)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]),
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".into())]),
)
);
@ -661,7 +658,7 @@ mod tests {
Some((42, true)),
resolve_asterisk_width(
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]),
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".into())]),
)
);
@ -672,9 +669,9 @@ mod tests {
NonZero::new(2).unwrap()
))),
&mut FormatArguments::new(&[
FormatArgument::Unparsed("1".to_string()),
FormatArgument::Unparsed("2".to_string()),
FormatArgument::Unparsed("3".to_string())
FormatArgument::Unparsed("1".into()),
FormatArgument::Unparsed("2".into()),
FormatArgument::Unparsed("3".into())
]),
)
);
@ -717,7 +714,7 @@ mod tests {
Some(42),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]),
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".into())]),
)
);
@ -732,7 +729,7 @@ mod tests {
Some(0),
resolve_asterisk_precision(
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]),
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".into())]),
)
);
assert_eq!(
@ -742,9 +739,9 @@ mod tests {
NonZero::new(2).unwrap()
))),
&mut FormatArguments::new(&[
FormatArgument::Unparsed("1".to_string()),
FormatArgument::Unparsed("2".to_string()),
FormatArgument::Unparsed("3".to_string())
FormatArgument::Unparsed("1".into()),
FormatArgument::Unparsed("2".into()),
FormatArgument::Unparsed("3".into())
]),
)
);

View file

@ -109,12 +109,12 @@ impl Base {
/// Type returned if a number could not be parsed in its entirety
#[derive(Debug, PartialEq)]
pub enum ExtendedParserError<'a, T> {
pub enum ExtendedParserError<T> {
/// The input as a whole makes no sense
NotNumeric,
/// The beginning of the input made sense and has been parsed,
/// while the remaining doesn't.
PartialMatch(T, &'a str),
PartialMatch(T, String),
/// The value has overflowed the type storage. The returned value
/// is saturated (e.g. positive or negative infinity, or min/max
/// value for the integer type).
@ -124,7 +124,7 @@ pub enum ExtendedParserError<'a, T> {
Underflow(T),
}
impl<'a, T> ExtendedParserError<'a, T>
impl<T> ExtendedParserError<T>
where
T: Zero,
{
@ -143,12 +143,12 @@ where
/// conversion.
fn map<U>(
self,
f: impl FnOnce(T) -> Result<U, ExtendedParserError<'a, U>>,
) -> ExtendedParserError<'a, U>
f: impl FnOnce(T) -> Result<U, ExtendedParserError<U>>,
) -> ExtendedParserError<U>
where
U: Zero,
{
fn extract<U>(v: Result<U, ExtendedParserError<'_, U>>) -> U
fn extract<U>(v: Result<U, ExtendedParserError<U>>) -> U
where
U: Zero,
{
@ -172,15 +172,15 @@ where
/// and `f64` float, where octal and binary formats are not allowed.
pub trait ExtendedParser {
// We pick a hopefully different name for our parser, to avoid clash with standard traits.
fn extended_parse(input: &str) -> Result<Self, ExtendedParserError<'_, Self>>
fn extended_parse(input: &str) -> Result<Self, ExtendedParserError<Self>>
where
Self: Sized;
}
impl ExtendedParser for i64 {
/// Parse a number as i64. No fractional part is allowed.
fn extended_parse(input: &str) -> Result<i64, ExtendedParserError<'_, i64>> {
fn into_i64<'a>(ebd: ExtendedBigDecimal) -> Result<i64, ExtendedParserError<'a, i64>> {
fn extended_parse(input: &str) -> Result<i64, ExtendedParserError<i64>> {
fn into_i64(ebd: ExtendedBigDecimal) -> Result<i64, ExtendedParserError<i64>> {
match ebd {
ExtendedBigDecimal::BigDecimal(bd) => {
let (digits, scale) = bd.into_bigint_and_scale();
@ -214,8 +214,8 @@ impl ExtendedParser for i64 {
impl ExtendedParser for u64 {
/// Parse a number as u64. No fractional part is allowed.
fn extended_parse(input: &str) -> Result<u64, ExtendedParserError<'_, u64>> {
fn into_u64<'a>(ebd: ExtendedBigDecimal) -> Result<u64, ExtendedParserError<'a, u64>> {
fn extended_parse(input: &str) -> Result<u64, ExtendedParserError<u64>> {
fn into_u64(ebd: ExtendedBigDecimal) -> Result<u64, ExtendedParserError<u64>> {
match ebd {
ExtendedBigDecimal::BigDecimal(bd) => {
let (digits, scale) = bd.into_bigint_and_scale();
@ -251,8 +251,8 @@ impl ExtendedParser for u64 {
impl ExtendedParser for f64 {
/// Parse a number as f64
fn extended_parse(input: &str) -> Result<f64, ExtendedParserError<'_, f64>> {
fn into_f64<'a>(ebd: ExtendedBigDecimal) -> Result<f64, ExtendedParserError<'a, f64>> {
fn extended_parse(input: &str) -> Result<f64, ExtendedParserError<f64>> {
fn into_f64(ebd: ExtendedBigDecimal) -> Result<f64, ExtendedParserError<f64>> {
// TODO: _Some_ of this is generic, so this should probably be implemented as an ExtendedBigDecimal trait (ToPrimitive).
let v = match ebd {
ExtendedBigDecimal::BigDecimal(bd) => {
@ -285,7 +285,7 @@ impl ExtendedParser for ExtendedBigDecimal {
/// Parse a number as an ExtendedBigDecimal
fn extended_parse(
input: &str,
) -> Result<ExtendedBigDecimal, ExtendedParserError<'_, ExtendedBigDecimal>> {
) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
parse(input, ParseTarget::Decimal, &[])
}
}
@ -349,11 +349,11 @@ fn parse_suffix_multiplier<'a>(str: &'a str, allowed_suffixes: &[(char, u32)]) -
(1, str)
}
fn parse_special_value<'a>(
input: &'a str,
fn parse_special_value(
input: &str,
negative: bool,
allowed_suffixes: &[(char, u32)],
) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
let input_lc = input.to_ascii_lowercase();
// Array of ("String to match", return value when sign positive, when sign negative)
@ -376,7 +376,7 @@ fn parse_special_value<'a>(
return if rest.is_empty() {
Ok(special)
} else {
Err(ExtendedParserError::PartialMatch(special, rest))
Err(ExtendedParserError::PartialMatch(special, rest.to_string()))
};
}
}
@ -386,7 +386,7 @@ fn parse_special_value<'a>(
/// Underflow/Overflow errors always contain 0 or infinity.
/// overflow: true for overflow, false for underflow.
fn make_error<'a>(overflow: bool, negative: bool) -> ExtendedParserError<'a, ExtendedBigDecimal> {
fn make_error(overflow: bool, negative: bool) -> ExtendedParserError<ExtendedBigDecimal> {
let mut v = if overflow {
ExtendedBigDecimal::Infinity
} else {
@ -468,13 +468,13 @@ fn pow_with_context(bd: &BigDecimal, exp: i64, ctx: &Context) -> BigDecimal {
}
/// Construct an [`ExtendedBigDecimal`] based on parsed data
fn construct_extended_big_decimal<'a>(
fn construct_extended_big_decimal(
digits: BigUint,
negative: bool,
base: Base,
scale: i64,
exponent: BigInt,
) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
if digits == BigUint::zero() {
// Return return 0 if the digits are zero. In particular, we do not ever
// return Overflow/Underflow errors in that case.
@ -541,25 +541,13 @@ pub(crate) enum ParseTarget {
Duration,
}
pub(crate) fn parse<'a>(
input: &'a str,
pub(crate) fn parse(
input: &str,
target: ParseTarget,
allowed_suffixes: &[(char, u32)],
) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
// Parse the " and ' prefixes separately
if target != ParseTarget::Duration {
if let Some(rest) = input.strip_prefix(['\'', '"']) {
let mut chars = rest.char_indices().fuse();
let v = chars
.next()
.map(|(_, c)| ExtendedBigDecimal::BigDecimal(u32::from(c).into()));
return match (v, chars.next()) {
(Some(v), None) => Ok(v),
(Some(v), Some((i, _))) => Err(ExtendedParserError::PartialMatch(v, &rest[i..])),
(None, _) => Err(ExtendedParserError::NotNumeric),
};
}
}
) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
// Note: literals with ' and " prefixes are parsed earlier on in argument parsing,
// before UTF-8 conversion.
let trimmed_input = input.trim_ascii_start();
@ -616,7 +604,7 @@ pub(crate) fn parse<'a>(
} else {
ExtendedBigDecimal::zero()
};
return Err(ExtendedParserError::PartialMatch(ebd, partial));
return Err(ExtendedParserError::PartialMatch(ebd, partial.to_string()));
}
return if target == ParseTarget::Integral {
@ -640,7 +628,7 @@ pub(crate) fn parse<'a>(
} else {
Err(ExtendedParserError::PartialMatch(
ebd_result.unwrap_or_else(|e| e.extract()),
rest,
rest.to_string(),
))
}
}
@ -686,14 +674,14 @@ mod tests {
u64::extended_parse(""),
Err(ExtendedParserError::NotNumeric)
));
assert!(matches!(
assert_eq!(
u64::extended_parse("123.15"),
Err(ExtendedParserError::PartialMatch(123, ".15"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(123, ".15".to_string()))
);
assert_eq!(
u64::extended_parse("123e10"),
Err(ExtendedParserError::PartialMatch(123, "e10"))
));
Err(ExtendedParserError::PartialMatch(123, "e10".to_string()))
);
}
#[test]
@ -707,18 +695,18 @@ mod tests {
));
assert_eq!(Ok(i64::MAX), i64::extended_parse(&format!("{}", i64::MAX)));
assert_eq!(Ok(i64::MIN), i64::extended_parse(&format!("{}", i64::MIN)));
assert!(matches!(
assert_eq!(
i64::extended_parse(&format!("{}", u64::MAX)),
Err(ExtendedParserError::Overflow(i64::MAX))
));
);
assert!(matches!(
i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)),
Err(ExtendedParserError::Overflow(i64::MAX))
));
assert!(matches!(
assert_eq!(
i64::extended_parse("-123e10"),
Err(ExtendedParserError::PartialMatch(-123, "e10"))
));
Err(ExtendedParserError::PartialMatch(-123, "e10".to_string()))
);
assert!(matches!(
i64::extended_parse(&format!("{}", -(u64::MAX as i128))),
Err(ExtendedParserError::Overflow(i64::MIN))
@ -770,20 +758,34 @@ mod tests {
Ok(0.15),
f64::extended_parse(".150000000000000000000000000231313")
);
assert!(matches!(f64::extended_parse("123.15e"),
Err(ExtendedParserError::PartialMatch(f, "e")) if f == 123.15));
assert!(matches!(f64::extended_parse("123.15E"),
Err(ExtendedParserError::PartialMatch(f, "E")) if f == 123.15));
assert!(matches!(f64::extended_parse("123.15e-"),
Err(ExtendedParserError::PartialMatch(f, "e-")) if f == 123.15));
assert!(matches!(f64::extended_parse("123.15e+"),
Err(ExtendedParserError::PartialMatch(f, "e+")) if f == 123.15));
assert!(matches!(f64::extended_parse("123.15e."),
Err(ExtendedParserError::PartialMatch(f, "e.")) if f == 123.15));
assert!(matches!(f64::extended_parse("1.2.3"),
Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2));
assert!(matches!(f64::extended_parse("123.15p5"),
Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15));
assert_eq!(
f64::extended_parse("123.15e"),
Err(ExtendedParserError::PartialMatch(123.15, "e".to_string()))
);
assert_eq!(
f64::extended_parse("123.15E"),
Err(ExtendedParserError::PartialMatch(123.15, "E".to_string()))
);
assert_eq!(
f64::extended_parse("123.15e-"),
Err(ExtendedParserError::PartialMatch(123.15, "e-".to_string()))
);
assert_eq!(
f64::extended_parse("123.15e+"),
Err(ExtendedParserError::PartialMatch(123.15, "e+".to_string()))
);
assert_eq!(
f64::extended_parse("123.15e."),
Err(ExtendedParserError::PartialMatch(123.15, "e.".to_string()))
);
assert_eq!(
f64::extended_parse("1.2.3"),
Err(ExtendedParserError::PartialMatch(1.2, ".3".to_string()))
);
assert_eq!(
f64::extended_parse("123.15p5"),
Err(ExtendedParserError::PartialMatch(123.15, "p5".to_string()))
);
// Minus zero. 0.0 == -0.0 so we explicitly check the sign.
assert_eq!(Ok(0.0), f64::extended_parse("-0.0"));
assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative());
@ -806,10 +808,20 @@ mod tests {
assert!(f64::extended_parse("nan").unwrap().is_sign_positive());
assert!(f64::extended_parse("NAN").unwrap().is_nan());
assert!(f64::extended_parse("NAN").unwrap().is_sign_positive());
assert!(matches!(f64::extended_parse("-infinit"),
Err(ExtendedParserError::PartialMatch(f, "init")) if f == f64::NEG_INFINITY));
assert!(matches!(f64::extended_parse("-infinity00"),
Err(ExtendedParserError::PartialMatch(f, "00")) if f == f64::NEG_INFINITY));
assert_eq!(
f64::extended_parse("-infinit"),
Err(ExtendedParserError::PartialMatch(
f64::NEG_INFINITY,
"init".to_string()
))
);
assert_eq!(
f64::extended_parse("-infinity00"),
Err(ExtendedParserError::PartialMatch(
f64::NEG_INFINITY,
"00".to_string()
))
);
assert!(f64::extended_parse(&format!("{}", u64::MAX)).is_ok());
assert!(f64::extended_parse(&format!("{}", i64::MIN)).is_ok());
@ -994,14 +1006,22 @@ mod tests {
// but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3
assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5"));
assert!(matches!(f64::extended_parse("0x0.1p"),
Err(ExtendedParserError::PartialMatch(f, "p")) if f == 0.0625));
assert!(matches!(f64::extended_parse("0x0.1p-"),
Err(ExtendedParserError::PartialMatch(f, "p-")) if f == 0.0625));
assert!(matches!(f64::extended_parse("0x.1p+"),
Err(ExtendedParserError::PartialMatch(f, "p+")) if f == 0.0625));
assert!(matches!(f64::extended_parse("0x.1p."),
Err(ExtendedParserError::PartialMatch(f, "p.")) if f == 0.0625));
assert_eq!(
f64::extended_parse("0x0.1p"),
Err(ExtendedParserError::PartialMatch(0.0625, "p".to_string()))
);
assert_eq!(
f64::extended_parse("0x0.1p-"),
Err(ExtendedParserError::PartialMatch(0.0625, "p-".to_string()))
);
assert_eq!(
f64::extended_parse("0x.1p+"),
Err(ExtendedParserError::PartialMatch(0.0625, "p+".to_string()))
);
assert_eq!(
f64::extended_parse("0x.1p."),
Err(ExtendedParserError::PartialMatch(0.0625, "p.".to_string()))
);
assert_eq!(
Ok(ExtendedBigDecimal::BigDecimal(
@ -1061,40 +1081,58 @@ mod tests {
));
// Not actually hex numbers, but the prefixes look like it.
assert!(matches!(f64::extended_parse("0x"),
Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0));
assert!(matches!(f64::extended_parse("0x."),
Err(ExtendedParserError::PartialMatch(f, "x.")) if f == 0.0));
assert!(matches!(f64::extended_parse("0xp"),
Err(ExtendedParserError::PartialMatch(f, "xp")) if f == 0.0));
assert!(matches!(f64::extended_parse("0xp-2"),
Err(ExtendedParserError::PartialMatch(f, "xp-2")) if f == 0.0));
assert!(matches!(f64::extended_parse("0x.p-2"),
Err(ExtendedParserError::PartialMatch(f, "x.p-2")) if f == 0.0));
assert!(matches!(f64::extended_parse("0X"),
Err(ExtendedParserError::PartialMatch(f, "X")) if f == 0.0));
assert!(matches!(f64::extended_parse("-0x"),
Err(ExtendedParserError::PartialMatch(f, "x")) if f == -0.0));
assert!(matches!(f64::extended_parse("+0x"),
Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0));
assert!(matches!(f64::extended_parse("-0x."),
Err(ExtendedParserError::PartialMatch(f, "x.")) if f == -0.0));
assert!(matches!(
assert_eq!(
f64::extended_parse("0x"),
Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
);
assert_eq!(
f64::extended_parse("0x."),
Err(ExtendedParserError::PartialMatch(0.0, "x.".to_string()))
);
assert_eq!(
f64::extended_parse("0xp"),
Err(ExtendedParserError::PartialMatch(0.0, "xp".to_string()))
);
assert_eq!(
f64::extended_parse("0xp-2"),
Err(ExtendedParserError::PartialMatch(0.0, "xp-2".to_string()))
);
assert_eq!(
f64::extended_parse("0x.p-2"),
Err(ExtendedParserError::PartialMatch(0.0, "x.p-2".to_string()))
);
assert_eq!(
f64::extended_parse("0X"),
Err(ExtendedParserError::PartialMatch(0.0, "X".to_string()))
);
assert_eq!(
f64::extended_parse("-0x"),
Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
);
assert_eq!(
f64::extended_parse("+0x"),
Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
);
assert_eq!(
f64::extended_parse("-0x."),
Err(ExtendedParserError::PartialMatch(-0.0, "x.".to_string()))
);
assert_eq!(
u64::extended_parse("0x"),
Err(ExtendedParserError::PartialMatch(0, "x"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
);
assert_eq!(
u64::extended_parse("-0x"),
Err(ExtendedParserError::PartialMatch(0, "x"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
);
assert_eq!(
i64::extended_parse("0x"),
Err(ExtendedParserError::PartialMatch(0, "x"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
);
assert_eq!(
i64::extended_parse("-0x"),
Err(ExtendedParserError::PartialMatch(0, "x"))
));
Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
);
}
#[test]
@ -1105,18 +1143,18 @@ mod tests {
assert_eq!(Ok(-0o123), i64::extended_parse("-0123"));
assert_eq!(Ok(0o123), u64::extended_parse("00123"));
assert_eq!(Ok(0), u64::extended_parse("00"));
assert!(matches!(
assert_eq!(
u64::extended_parse("008"),
Err(ExtendedParserError::PartialMatch(0, "8"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "8".to_string()))
);
assert_eq!(
u64::extended_parse("08"),
Err(ExtendedParserError::PartialMatch(0, "8"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "8".to_string()))
);
assert_eq!(
u64::extended_parse("0."),
Err(ExtendedParserError::PartialMatch(0, "."))
));
Err(ExtendedParserError::PartialMatch(0, ".".to_string()))
);
// No float tests, leading zeros get parsed as decimal anyway.
}
@ -1128,51 +1166,62 @@ mod tests {
assert_eq!(Ok(0b1011), u64::extended_parse("+0b1011"));
assert_eq!(Ok(-0b1011), i64::extended_parse("-0b1011"));
assert!(matches!(
assert_eq!(
u64::extended_parse("0b"),
Err(ExtendedParserError::PartialMatch(0, "b"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
);
assert_eq!(
u64::extended_parse("0b."),
Err(ExtendedParserError::PartialMatch(0, "b."))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "b.".to_string()))
);
assert_eq!(
u64::extended_parse("-0b"),
Err(ExtendedParserError::PartialMatch(0, "b"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
);
assert_eq!(
i64::extended_parse("0b"),
Err(ExtendedParserError::PartialMatch(0, "b"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
);
assert_eq!(
i64::extended_parse("-0b"),
Err(ExtendedParserError::PartialMatch(0, "b"))
));
Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
);
// Binary not allowed for floats
assert!(matches!(
assert_eq!(
f64::extended_parse("0b100"),
Err(ExtendedParserError::PartialMatch(0f64, "b100"))
));
assert!(matches!(
Err(ExtendedParserError::PartialMatch(0f64, "b100".to_string()))
);
assert_eq!(
f64::extended_parse("0b100.1"),
Err(ExtendedParserError::PartialMatch(0f64, "b100.1"))
));
Err(ExtendedParserError::PartialMatch(
0f64,
"b100.1".to_string()
))
);
assert!(match ExtendedBigDecimal::extended_parse("0b100.1") {
Err(ExtendedParserError::PartialMatch(ebd, "b100.1")) =>
ebd == ExtendedBigDecimal::zero(),
_ => false,
});
assert_eq!(
ExtendedBigDecimal::extended_parse("0b100.1"),
Err(ExtendedParserError::PartialMatch(
ExtendedBigDecimal::zero(),
"b100.1".to_string()
))
);
assert!(match ExtendedBigDecimal::extended_parse("0b") {
Err(ExtendedParserError::PartialMatch(ebd, "b")) => ebd == ExtendedBigDecimal::zero(),
_ => false,
});
assert!(match ExtendedBigDecimal::extended_parse("0b.") {
Err(ExtendedParserError::PartialMatch(ebd, "b.")) => ebd == ExtendedBigDecimal::zero(),
_ => false,
});
assert_eq!(
ExtendedBigDecimal::extended_parse("0b"),
Err(ExtendedParserError::PartialMatch(
ExtendedBigDecimal::zero(),
"b".to_string()
))
);
assert_eq!(
ExtendedBigDecimal::extended_parse("0b."),
Err(ExtendedParserError::PartialMatch(
ExtendedBigDecimal::zero(),
"b.".to_string()
))
);
}
#[test]
@ -1185,15 +1234,15 @@ mod tests {
// Ensure that trailing whitespace is still a partial match
assert_eq!(
Err(ExtendedParserError::PartialMatch(6, " ")),
Err(ExtendedParserError::PartialMatch(6, " ".to_string())),
u64::extended_parse("0x6 ")
);
assert_eq!(
Err(ExtendedParserError::PartialMatch(7, "\t")),
Err(ExtendedParserError::PartialMatch(7, "\t".to_string())),
u64::extended_parse("0x7\t")
);
assert_eq!(
Err(ExtendedParserError::PartialMatch(8, "\n")),
Err(ExtendedParserError::PartialMatch(8, "\n".to_string())),
u64::extended_parse("0x8\n")
);

View file

@ -311,23 +311,39 @@ pub fn read_yes() -> bool {
}
}
#[derive(Debug)]
pub struct NonUtf8OsStrError {
input_lossy_string: String,
}
impl std::fmt::Display for NonUtf8OsStrError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use os_display::Quotable;
let quoted = self.input_lossy_string.quote();
f.write_fmt(format_args!(
"invalid UTF-8 input {quoted} encountered when converting to bytes on a platform that doesn't expose byte arguments",
))
}
}
impl std::error::Error for NonUtf8OsStrError {}
impl error::UError for NonUtf8OsStrError {}
/// Converts an `OsStr` to a UTF-8 `&[u8]`.
///
/// This always succeeds on unix platforms,
/// and fails on other platforms if the string can't be coerced to UTF-8.
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
pub fn os_str_as_bytes(os_string: &OsStr) -> Result<&[u8], NonUtf8OsStrError> {
#[cfg(unix)]
let bytes = os_string.as_bytes();
return Ok(os_string.as_bytes());
#[cfg(not(unix))]
let bytes = os_string
os_string
.to_str()
.ok_or_else(|| {
mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
})?
.as_bytes();
Ok(bytes)
.ok_or_else(|| NonUtf8OsStrError {
input_lossy_string: os_string.to_string_lossy().into_owned(),
})
.map(|s| s.as_bytes())
}
/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
@ -336,15 +352,13 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
#[cfg(unix)]
let bytes = Cow::from(os_string.as_bytes());
return Cow::from(os_string.as_bytes());
#[cfg(not(unix))]
let bytes = match os_string.to_string_lossy() {
match os_string.to_string_lossy() {
Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
Cow::Owned(owned) => Cow::from(owned.into_bytes()),
};
bytes
}
}
/// Converts a `&[u8]` to an `&OsStr`,
@ -354,13 +368,12 @@ pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
#[cfg(unix)]
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
#[cfg(not(unix))]
let os_str = Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| {
mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr")
})?));
return Ok(Cow::Borrowed(OsStr::from_bytes(bytes)));
Ok(os_str)
#[cfg(not(unix))]
Ok(Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(
|_| mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr"),
)?)))
}
/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
@ -369,13 +382,12 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
#[cfg(unix)]
let s = OsString::from_vec(vec);
#[cfg(not(unix))]
let s = OsString::from(String::from_utf8(vec).map_err(|_| {
mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
})?);
return Ok(OsString::from_vec(vec));
Ok(s)
#[cfg(not(unix))]
Ok(OsString::from(String::from_utf8(vec).map_err(|_| {
mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
})?))
}
/// Converts an `OsString` into a `Vec<u8>`, parsing as UTF-8 on non-unix platforms.

View file

@ -805,7 +805,7 @@ fn test_overflow() {
fn partial_char() {
new_ucmd!()
.args(&["%d", "'abc"])
.fails_with_code(1)
.succeeds()
.stdout_is("97")
.stderr_is(
"printf: warning: bc: character(s) following character constant have been ignored\n",
@ -1293,23 +1293,80 @@ fn float_arg_with_whitespace() {
#[test]
fn mb_input() {
for format in ["\"á", "\'á", "'\u{e1}"] {
let cases = vec![
("%04x\n", "\"á", "00e1\n"),
("%04x\n", "", "00e1\n"),
("%04x\n", "'\u{e1}", "00e1\n"),
("%i\n", "\"á", "225\n"),
("%i\n", "", "225\n"),
("%i\n", "'\u{e1}", "225\n"),
("%f\n", "", "225.000000\n"),
];
for (format, arg, stdout) in cases {
new_ucmd!()
.args(&["%04x\n", format])
.args(&[format, arg])
.succeeds()
.stdout_only("00e1\n");
.stdout_only(stdout);
}
let cases = vec![
("\"á=", "="),
("\'á-", "-"),
("\'á=-==", "=-=="),
("'\u{e1}++", "++"),
("%04x\n", "\"á=", "00e1\n", "="),
("%04x\n", "'á-", "00e1\n", "-"),
("%04x\n", "'á=-==", "00e1\n", "=-=="),
("%04x\n", "'á'", "00e1\n", "'"),
("%04x\n", "'\u{e1}++", "00e1\n", "++"),
("%04x\n", "''á'", "0027\n", "á'"),
("%i\n", "\"á=", "225\n", "="),
];
for (format, expected) in cases {
for (format, arg, stdout, stderr) in cases {
new_ucmd!()
.args(&["%04x\n", format])
.args(&[format, arg])
.succeeds()
.stdout_is(stdout)
.stderr_is(format!("printf: warning: {stderr}: character(s) following character constant have been ignored\n"));
}
for arg in ["\"", "'"] {
new_ucmd!()
.args(&["%04x\n", arg])
.fails()
.stderr_contains("expected a numeric value");
}
}
#[test]
#[cfg(target_family = "unix")]
fn mb_invalid_unicode() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let cases = vec![
("%04x\n", b"\"\xe1", "00e1\n"),
("%04x\n", b"'\xe1", "00e1\n"),
("%i\n", b"\"\xe1", "225\n"),
("%i\n", b"'\xe1", "225\n"),
("%f\n", b"'\xe1", "225.000000\n"),
];
for (format, arg, stdout) in cases {
new_ucmd!()
.arg(format)
.arg(OsStr::from_bytes(arg))
.succeeds()
.stdout_only(stdout);
}
let cases = vec![
(b"\"\xe1=".as_slice(), "="),
(b"'\xe1-".as_slice(), "-"),
(b"'\xe1=-==".as_slice(), "=-=="),
(b"'\xe1'".as_slice(), "'"),
// unclear if original or replacement character is better in stderr
//(b"''\xe1'".as_slice(), "'<27>'"),
];
for (arg, expected) in cases {
new_ucmd!()
.arg("%04x\n")
.arg(OsStr::from_bytes(arg))
.succeeds()
.stdout_is("00e1\n")
.stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n"));
@ -1364,3 +1421,35 @@ fn positional_format_specifiers() {
.succeeds()
.stdout_only("Octal: 115, Int: 42, Float: 3.141590, String: hello, Hex: ff, Scientific: 1.000000e-05, Char: A, Unsigned: 100, Integer: 123");
}
#[test]
#[cfg(target_family = "unix")]
fn non_utf_8_input() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
// ISO-8859-1 encoded text
// spell-checker:disable
const INPUT_AND_OUTPUT: &[u8] =
b"Swer an rehte g\xFCete wendet s\xEEn gem\xFCete, dem volget s\xE6lde und \xEAre.";
// spell-checker:enable
let os_str = OsStr::from_bytes(INPUT_AND_OUTPUT);
new_ucmd!()
.arg("%s")
.arg(os_str)
.succeeds()
.stdout_only_bytes(INPUT_AND_OUTPUT);
new_ucmd!()
.arg(os_str)
.succeeds()
.stdout_only_bytes(INPUT_AND_OUTPUT);
new_ucmd!()
.arg("%d")
.arg(os_str)
.fails()
.stderr_contains("expected a numeric value");
}

View file

@ -38,11 +38,7 @@ This file documents why some tests are failing:
* gnu/tests/mv/part-hardlink.sh
* gnu/tests/od/od-N.sh
* gnu/tests/od/od-float.sh
* gnu/tests/printf/printf-cov.pl
* gnu/tests/printf/printf-indexed.sh
* gnu/tests/printf/printf-mb.sh
* gnu/tests/printf/printf-quote.sh
* gnu/tests/printf/printf.sh
* gnu/tests/ptx/ptx-overrun.sh
* gnu/tests/ptx/ptx.pl
* gnu/tests/rm/empty-inacc.sh - https://github.com/uutils/coreutils/issues/7033