mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
Merge pull request #8329 from drinkcat/printf-7209-update
Some checks are pending
CICD / Style/cargo-deny (push) Waiting to run
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Build (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
GnuTests / Run GNU tests (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
Some checks are pending
CICD / Style/cargo-deny (push) Waiting to run
CICD / Style/deps (push) Waiting to run
CICD / Documentation/warnings (push) Waiting to run
CICD / MinRustV (push) Waiting to run
CICD / Dependencies (push) Waiting to run
CICD / Build/Makefile (push) Blocked by required conditions
CICD / Build/stable (push) Blocked by required conditions
CICD / Build/nightly (push) Blocked by required conditions
CICD / Binary sizes (push) Blocked by required conditions
CICD / Build (push) Blocked by required conditions
CICD / Tests/BusyBox test suite (push) Blocked by required conditions
CICD / Tests/Toybox test suite (push) Blocked by required conditions
CICD / Code Coverage (push) Waiting to run
CICD / Separate Builds (push) Waiting to run
CICD / Test all features separately (push) Blocked by required conditions
CICD / Build/SELinux (push) Blocked by required conditions
GnuTests / Run GNU tests (push) Waiting to run
Android / Test builds (push) Waiting to run
Code Quality / Style/format (push) Waiting to run
Code Quality / Style/lint (push) Waiting to run
Code Quality / Style/spelling (push) Waiting to run
Code Quality / Style/toml (push) Waiting to run
Code Quality / Style/Python (push) Waiting to run
Code Quality / Pre-commit hooks (push) Waiting to run
FreeBSD / Style and Lint (push) Waiting to run
FreeBSD / Tests (push) Waiting to run
printf: accept non-UTF-8 input in FORMAT and ARGUMENT arguments
This commit is contained in:
commit
1bb7930a08
11 changed files with 512 additions and 292 deletions
|
|
@ -8,10 +8,9 @@ use clap::{Arg, ArgAction, Command};
|
|||
use std::env;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::io::{self, StdoutLock, Write};
|
||||
use uucore::error::{UResult, USimpleError};
|
||||
use uucore::error::UResult;
|
||||
use uucore::format::{FormatChar, OctalParsing, parse_escape_only};
|
||||
use uucore::format_usage;
|
||||
use uucore::os_str_as_bytes;
|
||||
use uucore::{format_usage, os_str_as_bytes};
|
||||
|
||||
use uucore::locale::get_message;
|
||||
|
||||
|
|
@ -223,9 +222,9 @@ pub fn uu_app() -> Command {
|
|||
|
||||
fn execute(stdout: &mut StdoutLock, args: Vec<OsString>, options: Options) -> UResult<()> {
|
||||
for (i, arg) in args.into_iter().enumerate() {
|
||||
let bytes = os_str_as_bytes(arg.as_os_str())
|
||||
.map_err(|_| USimpleError::new(1, get_message("echo-error-non-utf8")))?;
|
||||
let bytes = os_str_as_bytes(&arg)?;
|
||||
|
||||
// Don't print a space before the first argument
|
||||
if i > 0 {
|
||||
stdout.write_all(b" ")?;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
// file that was distributed with this source code.
|
||||
use clap::{Arg, ArgAction, Command};
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::io::stdout;
|
||||
use std::ops::ControlFlow;
|
||||
use uucore::error::{UResult, UUsageError};
|
||||
|
|
@ -18,21 +19,19 @@ mod options {
|
|||
pub const FORMAT: &str = "FORMAT";
|
||||
pub const ARGUMENT: &str = "ARGUMENT";
|
||||
}
|
||||
|
||||
#[uucore::main]
|
||||
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
||||
let matches = uu_app().get_matches_from(args);
|
||||
|
||||
let format = matches
|
||||
.get_one::<std::ffi::OsString>(options::FORMAT)
|
||||
.get_one::<OsString>(options::FORMAT)
|
||||
.ok_or_else(|| UUsageError::new(1, get_message("printf-error-missing-operand")))?;
|
||||
let format = os_str_as_bytes(format)?;
|
||||
|
||||
let values: Vec<_> = match matches.get_many::<std::ffi::OsString>(options::ARGUMENT) {
|
||||
// FIXME: use os_str_as_bytes once FormatArgument supports Vec<u8>
|
||||
let values: Vec<_> = match matches.get_many::<OsString>(options::ARGUMENT) {
|
||||
Some(s) => s
|
||||
.map(|os_string| {
|
||||
FormatArgument::Unparsed(std::ffi::OsStr::to_string_lossy(os_string).to_string())
|
||||
})
|
||||
.map(|os_string| FormatArgument::Unparsed(os_string.to_owned()))
|
||||
.collect(),
|
||||
None => vec![],
|
||||
};
|
||||
|
|
@ -62,7 +61,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
|
|||
"{}",
|
||||
get_message_with_args(
|
||||
"printf-warning-ignoring-excess-arguments",
|
||||
HashMap::from([("arg".to_string(), arg_str.to_string())])
|
||||
HashMap::from([("arg".to_string(), arg_str.to_string_lossy().to_string())])
|
||||
)
|
||||
);
|
||||
}
|
||||
|
|
@ -103,10 +102,10 @@ pub fn uu_app() -> Command {
|
|||
.help(get_message("printf-help-version"))
|
||||
.action(ArgAction::Version),
|
||||
)
|
||||
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(std::ffi::OsString)))
|
||||
.arg(Arg::new(options::FORMAT).value_parser(clap::value_parser!(OsString)))
|
||||
.arg(
|
||||
Arg::new(options::ARGUMENT)
|
||||
.action(ArgAction::Append)
|
||||
.value_parser(clap::value_parser!(std::ffi::OsString)),
|
||||
.value_parser(clap::value_parser!(OsString)),
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -968,7 +968,7 @@ fn process_checksum_line(
|
|||
cached_line_format: &mut Option<LineFormat>,
|
||||
last_algo: &mut Option<String>,
|
||||
) -> Result<(), LineCheckError> {
|
||||
let line_bytes = os_str_as_bytes(line)?;
|
||||
let line_bytes = os_str_as_bytes(line).map_err(|e| LineCheckError::UError(Box::new(e)))?;
|
||||
|
||||
// Early return on empty or commented lines.
|
||||
if line.is_empty() || line_bytes.starts_with(b"#") {
|
||||
|
|
|
|||
|
|
@ -101,6 +101,18 @@ impl From<f64> for ExtendedBigDecimal {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<u8> for ExtendedBigDecimal {
|
||||
fn from(val: u8) -> Self {
|
||||
Self::BigDecimal(val.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for ExtendedBigDecimal {
|
||||
fn from(val: u32) -> Self {
|
||||
Self::BigDecimal(val.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl ExtendedBigDecimal {
|
||||
pub fn zero() -> Self {
|
||||
Self::BigDecimal(0.into())
|
||||
|
|
|
|||
|
|
@ -7,12 +7,16 @@ use super::ExtendedBigDecimal;
|
|||
use crate::format::spec::ArgumentLocation;
|
||||
use crate::{
|
||||
error::set_exit_code,
|
||||
os_str_as_bytes,
|
||||
parser::num_parser::{ExtendedParser, ExtendedParserError},
|
||||
quoting_style::{QuotingStyle, locale_aware_escape_name},
|
||||
show_error, show_warning,
|
||||
};
|
||||
use os_display::Quotable;
|
||||
use std::{ffi::OsStr, num::NonZero};
|
||||
use std::{
|
||||
ffi::{OsStr, OsString},
|
||||
num::NonZero,
|
||||
};
|
||||
|
||||
/// An argument for formatting
|
||||
///
|
||||
|
|
@ -24,12 +28,12 @@ use std::{ffi::OsStr, num::NonZero};
|
|||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum FormatArgument {
|
||||
Char(char),
|
||||
String(String),
|
||||
String(OsString),
|
||||
UnsignedInt(u64),
|
||||
SignedInt(i64),
|
||||
Float(ExtendedBigDecimal),
|
||||
/// Special argument that gets coerced into the other variants
|
||||
Unparsed(String),
|
||||
Unparsed(OsString),
|
||||
}
|
||||
|
||||
/// A struct that holds a slice of format arguments and provides methods to access them
|
||||
|
|
@ -72,22 +76,25 @@ impl<'a> FormatArguments<'a> {
|
|||
pub fn next_char(&mut self, position: &ArgumentLocation) -> u8 {
|
||||
match self.next_arg(position) {
|
||||
Some(FormatArgument::Char(c)) => *c as u8,
|
||||
Some(FormatArgument::Unparsed(s)) => s.bytes().next().unwrap_or(b'\0'),
|
||||
Some(FormatArgument::Unparsed(os)) => match os_str_as_bytes(os) {
|
||||
Ok(bytes) => bytes.first().copied().unwrap_or(b'\0'),
|
||||
Err(_) => b'\0',
|
||||
},
|
||||
_ => b'\0',
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a str {
|
||||
pub fn next_string(&mut self, position: &ArgumentLocation) -> &'a OsStr {
|
||||
match self.next_arg(position) {
|
||||
Some(FormatArgument::Unparsed(s) | FormatArgument::String(s)) => s,
|
||||
_ => "",
|
||||
Some(FormatArgument::Unparsed(os) | FormatArgument::String(os)) => os,
|
||||
_ => "".as_ref(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_i64(&mut self, position: &ArgumentLocation) -> i64 {
|
||||
match self.next_arg(position) {
|
||||
Some(FormatArgument::SignedInt(n)) => *n,
|
||||
Some(FormatArgument::Unparsed(s)) => extract_value(i64::extended_parse(s), s),
|
||||
Some(FormatArgument::Unparsed(os)) => Self::get_num::<i64>(os),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
|
@ -95,25 +102,7 @@ impl<'a> FormatArguments<'a> {
|
|||
pub fn next_u64(&mut self, position: &ArgumentLocation) -> u64 {
|
||||
match self.next_arg(position) {
|
||||
Some(FormatArgument::UnsignedInt(n)) => *n,
|
||||
Some(FormatArgument::Unparsed(s)) => {
|
||||
// Check if the string is a character literal enclosed in quotes
|
||||
if s.starts_with(['"', '\'']) {
|
||||
// Extract the content between the quotes safely using chars
|
||||
let mut chars = s.trim_matches(|c| c == '"' || c == '\'').chars();
|
||||
if let Some(first_char) = chars.next() {
|
||||
if chars.clone().count() > 0 {
|
||||
// Emit a warning if there are additional characters
|
||||
let remaining: String = chars.collect();
|
||||
show_warning!(
|
||||
"{remaining}: character(s) following character constant have been ignored"
|
||||
);
|
||||
}
|
||||
return first_char as u64; // Use only the first character
|
||||
}
|
||||
return 0; // Empty quotes
|
||||
}
|
||||
extract_value(u64::extended_parse(s), s)
|
||||
}
|
||||
Some(FormatArgument::Unparsed(os)) => Self::get_num::<u64>(os),
|
||||
_ => 0,
|
||||
}
|
||||
}
|
||||
|
|
@ -121,13 +110,81 @@ impl<'a> FormatArguments<'a> {
|
|||
pub fn next_extended_big_decimal(&mut self, position: &ArgumentLocation) -> ExtendedBigDecimal {
|
||||
match self.next_arg(position) {
|
||||
Some(FormatArgument::Float(n)) => n.clone(),
|
||||
Some(FormatArgument::Unparsed(s)) => {
|
||||
extract_value(ExtendedBigDecimal::extended_parse(s), s)
|
||||
}
|
||||
Some(FormatArgument::Unparsed(os)) => Self::get_num::<ExtendedBigDecimal>(os),
|
||||
_ => ExtendedBigDecimal::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
// Parse an OsStr that we know to start with a '/"
|
||||
fn parse_quote_start<T>(os: &OsStr) -> Result<T, ExtendedParserError<T>>
|
||||
where
|
||||
T: ExtendedParser + From<u8> + From<u32> + Default,
|
||||
{
|
||||
// If this fails (this can only happens on Windows), then just
|
||||
// return NotNumeric.
|
||||
let s = match os_str_as_bytes(os) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return Err(ExtendedParserError::NotNumeric),
|
||||
};
|
||||
|
||||
let bytes = match s.split_first() {
|
||||
Some((b'"', bytes)) | Some((b'\'', bytes)) => bytes,
|
||||
_ => {
|
||||
// This really can't happen, the string we are given must start with '/".
|
||||
debug_assert!(false);
|
||||
return Err(ExtendedParserError::NotNumeric);
|
||||
}
|
||||
};
|
||||
|
||||
if bytes.is_empty() {
|
||||
return Err(ExtendedParserError::NotNumeric);
|
||||
}
|
||||
|
||||
let (val, len) = if let Some(c) = bytes
|
||||
.utf8_chunks()
|
||||
.next()
|
||||
.expect("bytes should not be empty")
|
||||
.valid()
|
||||
.chars()
|
||||
.next()
|
||||
{
|
||||
// Valid UTF-8 character, cast the codepoint to u32 then T
|
||||
// (largest unicode codepoint is only 3 bytes, so this is safe)
|
||||
((c as u32).into(), c.len_utf8())
|
||||
} else {
|
||||
// Not a valid UTF-8 character, use the first byte
|
||||
(bytes[0].into(), 1)
|
||||
};
|
||||
// Emit a warning if there are additional characters
|
||||
if bytes.len() > len {
|
||||
return Err(ExtendedParserError::PartialMatch(
|
||||
val,
|
||||
String::from_utf8_lossy(&bytes[len..]).into_owned(),
|
||||
));
|
||||
}
|
||||
|
||||
Ok(val)
|
||||
}
|
||||
|
||||
fn get_num<T>(os: &OsStr) -> T
|
||||
where
|
||||
T: ExtendedParser + From<u8> + From<u32> + Default,
|
||||
{
|
||||
let s = os.to_string_lossy();
|
||||
let first = s.as_bytes().first().copied();
|
||||
|
||||
let quote_start = first == Some(b'"') || first == Some(b'\'');
|
||||
let parsed = if quote_start {
|
||||
// The string begins with a quote
|
||||
Self::parse_quote_start(os)
|
||||
} else {
|
||||
T::extended_parse(&s)
|
||||
};
|
||||
|
||||
// Get the best possible value, even if parsed was an error.
|
||||
extract_value(parsed, &s, quote_start)
|
||||
}
|
||||
|
||||
fn get_at_relative_position(&mut self, pos: NonZero<usize>) -> Option<&'a FormatArgument> {
|
||||
let pos: usize = pos.into();
|
||||
let pos = (pos - 1).saturating_add(self.current_offset);
|
||||
|
|
@ -147,7 +204,11 @@ impl<'a> FormatArguments<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &str) -> T {
|
||||
fn extract_value<T: Default>(
|
||||
p: Result<T, ExtendedParserError<T>>,
|
||||
input: &str,
|
||||
quote_start: bool,
|
||||
) -> T {
|
||||
match p {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
|
|
@ -167,14 +228,15 @@ fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &s
|
|||
Default::default()
|
||||
}
|
||||
ExtendedParserError::PartialMatch(v, rest) => {
|
||||
let bytes = input.as_encoded_bytes();
|
||||
if !bytes.is_empty() && (bytes[0] == b'\'' || bytes[0] == b'"') {
|
||||
if quote_start {
|
||||
set_exit_code(0);
|
||||
show_warning!(
|
||||
"{rest}: character(s) following character constant have been ignored"
|
||||
);
|
||||
} else {
|
||||
show_error!("{}: value not completely converted", input.quote());
|
||||
}
|
||||
|
||||
v
|
||||
}
|
||||
}
|
||||
|
|
@ -249,11 +311,11 @@ mod tests {
|
|||
// Test with different method types in sequence
|
||||
let args = [
|
||||
FormatArgument::Char('a'),
|
||||
FormatArgument::String("hello".to_string()),
|
||||
FormatArgument::Unparsed("123".to_string()),
|
||||
FormatArgument::String("world".to_string()),
|
||||
FormatArgument::String("hello".into()),
|
||||
FormatArgument::Unparsed("123".into()),
|
||||
FormatArgument::String("world".into()),
|
||||
FormatArgument::Char('z'),
|
||||
FormatArgument::String("test".to_string()),
|
||||
FormatArgument::String("test".into()),
|
||||
];
|
||||
let mut args = FormatArguments::new(&args);
|
||||
|
||||
|
|
@ -384,10 +446,10 @@ mod tests {
|
|||
fn test_unparsed_arguments() {
|
||||
// Test with unparsed arguments that get coerced
|
||||
let args = [
|
||||
FormatArgument::Unparsed("hello".to_string()),
|
||||
FormatArgument::Unparsed("123".to_string()),
|
||||
FormatArgument::Unparsed("hello".to_string()),
|
||||
FormatArgument::Unparsed("456".to_string()),
|
||||
FormatArgument::Unparsed("hello".into()),
|
||||
FormatArgument::Unparsed("123".into()),
|
||||
FormatArgument::Unparsed("hello".into()),
|
||||
FormatArgument::Unparsed("456".into()),
|
||||
];
|
||||
let mut args = FormatArguments::new(&args);
|
||||
|
||||
|
|
@ -409,10 +471,10 @@ mod tests {
|
|||
// Test with mixed types and positional access
|
||||
let args = [
|
||||
FormatArgument::Char('a'),
|
||||
FormatArgument::String("test".to_string()),
|
||||
FormatArgument::String("test".into()),
|
||||
FormatArgument::UnsignedInt(42),
|
||||
FormatArgument::Char('b'),
|
||||
FormatArgument::String("more".to_string()),
|
||||
FormatArgument::String("more".into()),
|
||||
FormatArgument::UnsignedInt(99),
|
||||
];
|
||||
let mut args = FormatArguments::new(&args);
|
||||
|
|
|
|||
|
|
@ -37,8 +37,12 @@ pub mod human;
|
|||
pub mod num_format;
|
||||
mod spec;
|
||||
|
||||
pub use self::escape::{EscapedChar, OctalParsing};
|
||||
use crate::extendedbigdecimal::ExtendedBigDecimal;
|
||||
pub use argument::*;
|
||||
pub use argument::{FormatArgument, FormatArguments};
|
||||
|
||||
use self::{escape::parse_escape_code, num_format::Formatter};
|
||||
use crate::{NonUtf8OsStrError, error::UError};
|
||||
pub use spec::Spec;
|
||||
use std::{
|
||||
error::Error,
|
||||
|
|
@ -50,13 +54,6 @@ use std::{
|
|||
|
||||
use os_display::Quotable;
|
||||
|
||||
use crate::error::UError;
|
||||
|
||||
pub use self::{
|
||||
escape::{EscapedChar, OctalParsing, parse_escape_code},
|
||||
num_format::Formatter,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum FormatError {
|
||||
SpecError(Vec<u8>),
|
||||
|
|
@ -74,6 +71,7 @@ pub enum FormatError {
|
|||
/// The hexadecimal characters represent a code point that cannot represent a
|
||||
/// Unicode character (e.g., a surrogate code point)
|
||||
InvalidCharacter(char, Vec<u8>),
|
||||
InvalidEncoding(NonUtf8OsStrError),
|
||||
}
|
||||
|
||||
impl Error for FormatError {}
|
||||
|
|
@ -85,6 +83,12 @@ impl From<std::io::Error> for FormatError {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<NonUtf8OsStrError> for FormatError {
|
||||
fn from(value: NonUtf8OsStrError) -> FormatError {
|
||||
FormatError::InvalidEncoding(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for FormatError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
|
|
@ -118,6 +122,7 @@ impl Display for FormatError {
|
|||
"invalid universal character name \\{escape_char}{}",
|
||||
String::from_utf8_lossy(digits)
|
||||
),
|
||||
Self::InvalidEncoding(no) => no.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,8 +5,6 @@
|
|||
|
||||
// spell-checker:ignore (vars) intmax ptrdiff padlen
|
||||
|
||||
use crate::quoting_style::{QuotingStyle, locale_aware_escape_name};
|
||||
|
||||
use super::{
|
||||
ExtendedBigDecimal, FormatChar, FormatError, OctalParsing,
|
||||
num_format::{
|
||||
|
|
@ -15,7 +13,11 @@ use super::{
|
|||
},
|
||||
parse_escape_only,
|
||||
};
|
||||
use crate::format::FormatArguments;
|
||||
use crate::{
|
||||
format::FormatArguments,
|
||||
os_str_as_bytes,
|
||||
quoting_style::{QuotingStyle, locale_aware_escape_name},
|
||||
};
|
||||
use std::{io::Write, num::NonZero, ops::ControlFlow};
|
||||
|
||||
/// A parsed specification for formatting a value
|
||||
|
|
@ -375,22 +377,21 @@ impl Spec {
|
|||
// TODO: We need to not use Rust's formatting for aligning the output,
|
||||
// so that we can just write bytes to stdout without panicking.
|
||||
let precision = resolve_asterisk_precision(*precision, args);
|
||||
let s = args.next_string(position);
|
||||
let os_str = args.next_string(position);
|
||||
let bytes = os_str_as_bytes(os_str)?;
|
||||
|
||||
let truncated = match precision {
|
||||
Some(p) if p < s.len() => &s[..p],
|
||||
_ => s,
|
||||
Some(p) if p < os_str.len() => &bytes[..p],
|
||||
_ => bytes,
|
||||
};
|
||||
write_padded(
|
||||
writer,
|
||||
truncated.as_bytes(),
|
||||
width,
|
||||
*align_left || neg_width,
|
||||
)
|
||||
write_padded(writer, truncated, width, *align_left || neg_width)
|
||||
}
|
||||
Self::EscapedString { position } => {
|
||||
let s = args.next_string(position);
|
||||
let mut parsed = Vec::new();
|
||||
for c in parse_escape_only(s.as_bytes(), OctalParsing::ThreeDigits) {
|
||||
let os_str = args.next_string(position);
|
||||
let bytes = os_str_as_bytes(os_str)?;
|
||||
let mut parsed = Vec::<u8>::new();
|
||||
|
||||
for c in parse_escape_only(bytes, OctalParsing::ThreeDigits) {
|
||||
match c.write(&mut parsed)? {
|
||||
ControlFlow::Continue(()) => {}
|
||||
ControlFlow::Break(()) => {
|
||||
|
|
@ -403,15 +404,11 @@ impl Spec {
|
|||
}
|
||||
Self::QuotedString { position } => {
|
||||
let s = locale_aware_escape_name(
|
||||
args.next_string(position).as_ref(),
|
||||
args.next_string(position),
|
||||
QuotingStyle::SHELL_ESCAPE,
|
||||
);
|
||||
#[cfg(unix)]
|
||||
let bytes = std::os::unix::ffi::OsStringExt::into_vec(s);
|
||||
#[cfg(not(unix))]
|
||||
let bytes = s.to_string_lossy().as_bytes().to_owned();
|
||||
|
||||
writer.write_all(&bytes).map_err(FormatError::IoError)
|
||||
let bytes = os_str_as_bytes(&s)?;
|
||||
writer.write_all(bytes).map_err(FormatError::IoError)
|
||||
}
|
||||
Self::SignedInt {
|
||||
width,
|
||||
|
|
@ -646,7 +643,7 @@ mod tests {
|
|||
Some((42, false)),
|
||||
resolve_asterisk_width(
|
||||
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
|
||||
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]),
|
||||
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".into())]),
|
||||
)
|
||||
);
|
||||
|
||||
|
|
@ -661,7 +658,7 @@ mod tests {
|
|||
Some((42, true)),
|
||||
resolve_asterisk_width(
|
||||
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
|
||||
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]),
|
||||
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".into())]),
|
||||
)
|
||||
);
|
||||
|
||||
|
|
@ -672,9 +669,9 @@ mod tests {
|
|||
NonZero::new(2).unwrap()
|
||||
))),
|
||||
&mut FormatArguments::new(&[
|
||||
FormatArgument::Unparsed("1".to_string()),
|
||||
FormatArgument::Unparsed("2".to_string()),
|
||||
FormatArgument::Unparsed("3".to_string())
|
||||
FormatArgument::Unparsed("1".into()),
|
||||
FormatArgument::Unparsed("2".into()),
|
||||
FormatArgument::Unparsed("3".into())
|
||||
]),
|
||||
)
|
||||
);
|
||||
|
|
@ -717,7 +714,7 @@ mod tests {
|
|||
Some(42),
|
||||
resolve_asterisk_precision(
|
||||
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
|
||||
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".to_string())]),
|
||||
&mut FormatArguments::new(&[FormatArgument::Unparsed("42".into())]),
|
||||
)
|
||||
);
|
||||
|
||||
|
|
@ -732,7 +729,7 @@ mod tests {
|
|||
Some(0),
|
||||
resolve_asterisk_precision(
|
||||
Some(CanAsterisk::Asterisk(ArgumentLocation::NextArgument)),
|
||||
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".to_string())]),
|
||||
&mut FormatArguments::new(&[FormatArgument::Unparsed("-42".into())]),
|
||||
)
|
||||
);
|
||||
assert_eq!(
|
||||
|
|
@ -742,9 +739,9 @@ mod tests {
|
|||
NonZero::new(2).unwrap()
|
||||
))),
|
||||
&mut FormatArguments::new(&[
|
||||
FormatArgument::Unparsed("1".to_string()),
|
||||
FormatArgument::Unparsed("2".to_string()),
|
||||
FormatArgument::Unparsed("3".to_string())
|
||||
FormatArgument::Unparsed("1".into()),
|
||||
FormatArgument::Unparsed("2".into()),
|
||||
FormatArgument::Unparsed("3".into())
|
||||
]),
|
||||
)
|
||||
);
|
||||
|
|
|
|||
|
|
@ -109,12 +109,12 @@ impl Base {
|
|||
|
||||
/// Type returned if a number could not be parsed in its entirety
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum ExtendedParserError<'a, T> {
|
||||
pub enum ExtendedParserError<T> {
|
||||
/// The input as a whole makes no sense
|
||||
NotNumeric,
|
||||
/// The beginning of the input made sense and has been parsed,
|
||||
/// while the remaining doesn't.
|
||||
PartialMatch(T, &'a str),
|
||||
PartialMatch(T, String),
|
||||
/// The value has overflowed the type storage. The returned value
|
||||
/// is saturated (e.g. positive or negative infinity, or min/max
|
||||
/// value for the integer type).
|
||||
|
|
@ -124,7 +124,7 @@ pub enum ExtendedParserError<'a, T> {
|
|||
Underflow(T),
|
||||
}
|
||||
|
||||
impl<'a, T> ExtendedParserError<'a, T>
|
||||
impl<T> ExtendedParserError<T>
|
||||
where
|
||||
T: Zero,
|
||||
{
|
||||
|
|
@ -143,12 +143,12 @@ where
|
|||
/// conversion.
|
||||
fn map<U>(
|
||||
self,
|
||||
f: impl FnOnce(T) -> Result<U, ExtendedParserError<'a, U>>,
|
||||
) -> ExtendedParserError<'a, U>
|
||||
f: impl FnOnce(T) -> Result<U, ExtendedParserError<U>>,
|
||||
) -> ExtendedParserError<U>
|
||||
where
|
||||
U: Zero,
|
||||
{
|
||||
fn extract<U>(v: Result<U, ExtendedParserError<'_, U>>) -> U
|
||||
fn extract<U>(v: Result<U, ExtendedParserError<U>>) -> U
|
||||
where
|
||||
U: Zero,
|
||||
{
|
||||
|
|
@ -172,15 +172,15 @@ where
|
|||
/// and `f64` float, where octal and binary formats are not allowed.
|
||||
pub trait ExtendedParser {
|
||||
// We pick a hopefully different name for our parser, to avoid clash with standard traits.
|
||||
fn extended_parse(input: &str) -> Result<Self, ExtendedParserError<'_, Self>>
|
||||
fn extended_parse(input: &str) -> Result<Self, ExtendedParserError<Self>>
|
||||
where
|
||||
Self: Sized;
|
||||
}
|
||||
|
||||
impl ExtendedParser for i64 {
|
||||
/// Parse a number as i64. No fractional part is allowed.
|
||||
fn extended_parse(input: &str) -> Result<i64, ExtendedParserError<'_, i64>> {
|
||||
fn into_i64<'a>(ebd: ExtendedBigDecimal) -> Result<i64, ExtendedParserError<'a, i64>> {
|
||||
fn extended_parse(input: &str) -> Result<i64, ExtendedParserError<i64>> {
|
||||
fn into_i64(ebd: ExtendedBigDecimal) -> Result<i64, ExtendedParserError<i64>> {
|
||||
match ebd {
|
||||
ExtendedBigDecimal::BigDecimal(bd) => {
|
||||
let (digits, scale) = bd.into_bigint_and_scale();
|
||||
|
|
@ -214,8 +214,8 @@ impl ExtendedParser for i64 {
|
|||
|
||||
impl ExtendedParser for u64 {
|
||||
/// Parse a number as u64. No fractional part is allowed.
|
||||
fn extended_parse(input: &str) -> Result<u64, ExtendedParserError<'_, u64>> {
|
||||
fn into_u64<'a>(ebd: ExtendedBigDecimal) -> Result<u64, ExtendedParserError<'a, u64>> {
|
||||
fn extended_parse(input: &str) -> Result<u64, ExtendedParserError<u64>> {
|
||||
fn into_u64(ebd: ExtendedBigDecimal) -> Result<u64, ExtendedParserError<u64>> {
|
||||
match ebd {
|
||||
ExtendedBigDecimal::BigDecimal(bd) => {
|
||||
let (digits, scale) = bd.into_bigint_and_scale();
|
||||
|
|
@ -251,8 +251,8 @@ impl ExtendedParser for u64 {
|
|||
|
||||
impl ExtendedParser for f64 {
|
||||
/// Parse a number as f64
|
||||
fn extended_parse(input: &str) -> Result<f64, ExtendedParserError<'_, f64>> {
|
||||
fn into_f64<'a>(ebd: ExtendedBigDecimal) -> Result<f64, ExtendedParserError<'a, f64>> {
|
||||
fn extended_parse(input: &str) -> Result<f64, ExtendedParserError<f64>> {
|
||||
fn into_f64(ebd: ExtendedBigDecimal) -> Result<f64, ExtendedParserError<f64>> {
|
||||
// TODO: _Some_ of this is generic, so this should probably be implemented as an ExtendedBigDecimal trait (ToPrimitive).
|
||||
let v = match ebd {
|
||||
ExtendedBigDecimal::BigDecimal(bd) => {
|
||||
|
|
@ -285,7 +285,7 @@ impl ExtendedParser for ExtendedBigDecimal {
|
|||
/// Parse a number as an ExtendedBigDecimal
|
||||
fn extended_parse(
|
||||
input: &str,
|
||||
) -> Result<ExtendedBigDecimal, ExtendedParserError<'_, ExtendedBigDecimal>> {
|
||||
) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
|
||||
parse(input, ParseTarget::Decimal, &[])
|
||||
}
|
||||
}
|
||||
|
|
@ -349,11 +349,11 @@ fn parse_suffix_multiplier<'a>(str: &'a str, allowed_suffixes: &[(char, u32)]) -
|
|||
(1, str)
|
||||
}
|
||||
|
||||
fn parse_special_value<'a>(
|
||||
input: &'a str,
|
||||
fn parse_special_value(
|
||||
input: &str,
|
||||
negative: bool,
|
||||
allowed_suffixes: &[(char, u32)],
|
||||
) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
|
||||
) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
|
||||
let input_lc = input.to_ascii_lowercase();
|
||||
|
||||
// Array of ("String to match", return value when sign positive, when sign negative)
|
||||
|
|
@ -376,7 +376,7 @@ fn parse_special_value<'a>(
|
|||
return if rest.is_empty() {
|
||||
Ok(special)
|
||||
} else {
|
||||
Err(ExtendedParserError::PartialMatch(special, rest))
|
||||
Err(ExtendedParserError::PartialMatch(special, rest.to_string()))
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -386,7 +386,7 @@ fn parse_special_value<'a>(
|
|||
|
||||
/// Underflow/Overflow errors always contain 0 or infinity.
|
||||
/// overflow: true for overflow, false for underflow.
|
||||
fn make_error<'a>(overflow: bool, negative: bool) -> ExtendedParserError<'a, ExtendedBigDecimal> {
|
||||
fn make_error(overflow: bool, negative: bool) -> ExtendedParserError<ExtendedBigDecimal> {
|
||||
let mut v = if overflow {
|
||||
ExtendedBigDecimal::Infinity
|
||||
} else {
|
||||
|
|
@ -468,13 +468,13 @@ fn pow_with_context(bd: &BigDecimal, exp: i64, ctx: &Context) -> BigDecimal {
|
|||
}
|
||||
|
||||
/// Construct an [`ExtendedBigDecimal`] based on parsed data
|
||||
fn construct_extended_big_decimal<'a>(
|
||||
fn construct_extended_big_decimal(
|
||||
digits: BigUint,
|
||||
negative: bool,
|
||||
base: Base,
|
||||
scale: i64,
|
||||
exponent: BigInt,
|
||||
) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
|
||||
) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
|
||||
if digits == BigUint::zero() {
|
||||
// Return return 0 if the digits are zero. In particular, we do not ever
|
||||
// return Overflow/Underflow errors in that case.
|
||||
|
|
@ -541,25 +541,13 @@ pub(crate) enum ParseTarget {
|
|||
Duration,
|
||||
}
|
||||
|
||||
pub(crate) fn parse<'a>(
|
||||
input: &'a str,
|
||||
pub(crate) fn parse(
|
||||
input: &str,
|
||||
target: ParseTarget,
|
||||
allowed_suffixes: &[(char, u32)],
|
||||
) -> Result<ExtendedBigDecimal, ExtendedParserError<'a, ExtendedBigDecimal>> {
|
||||
// Parse the " and ' prefixes separately
|
||||
if target != ParseTarget::Duration {
|
||||
if let Some(rest) = input.strip_prefix(['\'', '"']) {
|
||||
let mut chars = rest.char_indices().fuse();
|
||||
let v = chars
|
||||
.next()
|
||||
.map(|(_, c)| ExtendedBigDecimal::BigDecimal(u32::from(c).into()));
|
||||
return match (v, chars.next()) {
|
||||
(Some(v), None) => Ok(v),
|
||||
(Some(v), Some((i, _))) => Err(ExtendedParserError::PartialMatch(v, &rest[i..])),
|
||||
(None, _) => Err(ExtendedParserError::NotNumeric),
|
||||
};
|
||||
}
|
||||
}
|
||||
) -> Result<ExtendedBigDecimal, ExtendedParserError<ExtendedBigDecimal>> {
|
||||
// Note: literals with ' and " prefixes are parsed earlier on in argument parsing,
|
||||
// before UTF-8 conversion.
|
||||
|
||||
let trimmed_input = input.trim_ascii_start();
|
||||
|
||||
|
|
@ -616,7 +604,7 @@ pub(crate) fn parse<'a>(
|
|||
} else {
|
||||
ExtendedBigDecimal::zero()
|
||||
};
|
||||
return Err(ExtendedParserError::PartialMatch(ebd, partial));
|
||||
return Err(ExtendedParserError::PartialMatch(ebd, partial.to_string()));
|
||||
}
|
||||
|
||||
return if target == ParseTarget::Integral {
|
||||
|
|
@ -640,7 +628,7 @@ pub(crate) fn parse<'a>(
|
|||
} else {
|
||||
Err(ExtendedParserError::PartialMatch(
|
||||
ebd_result.unwrap_or_else(|e| e.extract()),
|
||||
rest,
|
||||
rest.to_string(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
|
@ -686,14 +674,14 @@ mod tests {
|
|||
u64::extended_parse(""),
|
||||
Err(ExtendedParserError::NotNumeric)
|
||||
));
|
||||
assert!(matches!(
|
||||
assert_eq!(
|
||||
u64::extended_parse("123.15"),
|
||||
Err(ExtendedParserError::PartialMatch(123, ".15"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(123, ".15".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
u64::extended_parse("123e10"),
|
||||
Err(ExtendedParserError::PartialMatch(123, "e10"))
|
||||
));
|
||||
Err(ExtendedParserError::PartialMatch(123, "e10".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -707,18 +695,18 @@ mod tests {
|
|||
));
|
||||
assert_eq!(Ok(i64::MAX), i64::extended_parse(&format!("{}", i64::MAX)));
|
||||
assert_eq!(Ok(i64::MIN), i64::extended_parse(&format!("{}", i64::MIN)));
|
||||
assert!(matches!(
|
||||
assert_eq!(
|
||||
i64::extended_parse(&format!("{}", u64::MAX)),
|
||||
Err(ExtendedParserError::Overflow(i64::MAX))
|
||||
));
|
||||
);
|
||||
assert!(matches!(
|
||||
i64::extended_parse(&format!("{}", i64::MAX as u64 + 1)),
|
||||
Err(ExtendedParserError::Overflow(i64::MAX))
|
||||
));
|
||||
assert!(matches!(
|
||||
assert_eq!(
|
||||
i64::extended_parse("-123e10"),
|
||||
Err(ExtendedParserError::PartialMatch(-123, "e10"))
|
||||
));
|
||||
Err(ExtendedParserError::PartialMatch(-123, "e10".to_string()))
|
||||
);
|
||||
assert!(matches!(
|
||||
i64::extended_parse(&format!("{}", -(u64::MAX as i128))),
|
||||
Err(ExtendedParserError::Overflow(i64::MIN))
|
||||
|
|
@ -770,20 +758,34 @@ mod tests {
|
|||
Ok(0.15),
|
||||
f64::extended_parse(".150000000000000000000000000231313")
|
||||
);
|
||||
assert!(matches!(f64::extended_parse("123.15e"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "e")) if f == 123.15));
|
||||
assert!(matches!(f64::extended_parse("123.15E"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "E")) if f == 123.15));
|
||||
assert!(matches!(f64::extended_parse("123.15e-"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "e-")) if f == 123.15));
|
||||
assert!(matches!(f64::extended_parse("123.15e+"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "e+")) if f == 123.15));
|
||||
assert!(matches!(f64::extended_parse("123.15e."),
|
||||
Err(ExtendedParserError::PartialMatch(f, "e.")) if f == 123.15));
|
||||
assert!(matches!(f64::extended_parse("1.2.3"),
|
||||
Err(ExtendedParserError::PartialMatch(f, ".3")) if f == 1.2));
|
||||
assert!(matches!(f64::extended_parse("123.15p5"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "p5")) if f == 123.15));
|
||||
assert_eq!(
|
||||
f64::extended_parse("123.15e"),
|
||||
Err(ExtendedParserError::PartialMatch(123.15, "e".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("123.15E"),
|
||||
Err(ExtendedParserError::PartialMatch(123.15, "E".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("123.15e-"),
|
||||
Err(ExtendedParserError::PartialMatch(123.15, "e-".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("123.15e+"),
|
||||
Err(ExtendedParserError::PartialMatch(123.15, "e+".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("123.15e."),
|
||||
Err(ExtendedParserError::PartialMatch(123.15, "e.".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("1.2.3"),
|
||||
Err(ExtendedParserError::PartialMatch(1.2, ".3".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("123.15p5"),
|
||||
Err(ExtendedParserError::PartialMatch(123.15, "p5".to_string()))
|
||||
);
|
||||
// Minus zero. 0.0 == -0.0 so we explicitly check the sign.
|
||||
assert_eq!(Ok(0.0), f64::extended_parse("-0.0"));
|
||||
assert!(f64::extended_parse("-0.0").unwrap().is_sign_negative());
|
||||
|
|
@ -806,10 +808,20 @@ mod tests {
|
|||
assert!(f64::extended_parse("nan").unwrap().is_sign_positive());
|
||||
assert!(f64::extended_parse("NAN").unwrap().is_nan());
|
||||
assert!(f64::extended_parse("NAN").unwrap().is_sign_positive());
|
||||
assert!(matches!(f64::extended_parse("-infinit"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "init")) if f == f64::NEG_INFINITY));
|
||||
assert!(matches!(f64::extended_parse("-infinity00"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "00")) if f == f64::NEG_INFINITY));
|
||||
assert_eq!(
|
||||
f64::extended_parse("-infinit"),
|
||||
Err(ExtendedParserError::PartialMatch(
|
||||
f64::NEG_INFINITY,
|
||||
"init".to_string()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("-infinity00"),
|
||||
Err(ExtendedParserError::PartialMatch(
|
||||
f64::NEG_INFINITY,
|
||||
"00".to_string()
|
||||
))
|
||||
);
|
||||
assert!(f64::extended_parse(&format!("{}", u64::MAX)).is_ok());
|
||||
assert!(f64::extended_parse(&format!("{}", i64::MIN)).is_ok());
|
||||
|
||||
|
|
@ -994,14 +1006,22 @@ mod tests {
|
|||
// but we can check that the number still gets parsed properly: 0x0.8e5 is 0x8e5 / 16**3
|
||||
assert_eq!(Ok(0.555908203125), f64::extended_parse("0x0.8e5"));
|
||||
|
||||
assert!(matches!(f64::extended_parse("0x0.1p"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "p")) if f == 0.0625));
|
||||
assert!(matches!(f64::extended_parse("0x0.1p-"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "p-")) if f == 0.0625));
|
||||
assert!(matches!(f64::extended_parse("0x.1p+"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "p+")) if f == 0.0625));
|
||||
assert!(matches!(f64::extended_parse("0x.1p."),
|
||||
Err(ExtendedParserError::PartialMatch(f, "p.")) if f == 0.0625));
|
||||
assert_eq!(
|
||||
f64::extended_parse("0x0.1p"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0625, "p".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0x0.1p-"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0625, "p-".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0x.1p+"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0625, "p+".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0x.1p."),
|
||||
Err(ExtendedParserError::PartialMatch(0.0625, "p.".to_string()))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
Ok(ExtendedBigDecimal::BigDecimal(
|
||||
|
|
@ -1061,40 +1081,58 @@ mod tests {
|
|||
));
|
||||
|
||||
// Not actually hex numbers, but the prefixes look like it.
|
||||
assert!(matches!(f64::extended_parse("0x"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0));
|
||||
assert!(matches!(f64::extended_parse("0x."),
|
||||
Err(ExtendedParserError::PartialMatch(f, "x.")) if f == 0.0));
|
||||
assert!(matches!(f64::extended_parse("0xp"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "xp")) if f == 0.0));
|
||||
assert!(matches!(f64::extended_parse("0xp-2"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "xp-2")) if f == 0.0));
|
||||
assert!(matches!(f64::extended_parse("0x.p-2"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "x.p-2")) if f == 0.0));
|
||||
assert!(matches!(f64::extended_parse("0X"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "X")) if f == 0.0));
|
||||
assert!(matches!(f64::extended_parse("-0x"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "x")) if f == -0.0));
|
||||
assert!(matches!(f64::extended_parse("+0x"),
|
||||
Err(ExtendedParserError::PartialMatch(f, "x")) if f == 0.0));
|
||||
assert!(matches!(f64::extended_parse("-0x."),
|
||||
Err(ExtendedParserError::PartialMatch(f, "x.")) if f == -0.0));
|
||||
assert!(matches!(
|
||||
assert_eq!(
|
||||
f64::extended_parse("0x"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0x."),
|
||||
Err(ExtendedParserError::PartialMatch(0.0, "x.".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0xp"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0, "xp".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0xp-2"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0, "xp-2".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0x.p-2"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0, "x.p-2".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0X"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0, "X".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("-0x"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("+0x"),
|
||||
Err(ExtendedParserError::PartialMatch(0.0, "x".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("-0x."),
|
||||
Err(ExtendedParserError::PartialMatch(-0.0, "x.".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
u64::extended_parse("0x"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "x"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
u64::extended_parse("-0x"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "x"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
i64::extended_parse("0x"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "x"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
i64::extended_parse("-0x"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "x"))
|
||||
));
|
||||
Err(ExtendedParserError::PartialMatch(0, "x".to_string()))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -1105,18 +1143,18 @@ mod tests {
|
|||
assert_eq!(Ok(-0o123), i64::extended_parse("-0123"));
|
||||
assert_eq!(Ok(0o123), u64::extended_parse("00123"));
|
||||
assert_eq!(Ok(0), u64::extended_parse("00"));
|
||||
assert!(matches!(
|
||||
assert_eq!(
|
||||
u64::extended_parse("008"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "8"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "8".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
u64::extended_parse("08"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "8"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "8".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
u64::extended_parse("0."),
|
||||
Err(ExtendedParserError::PartialMatch(0, "."))
|
||||
));
|
||||
Err(ExtendedParserError::PartialMatch(0, ".".to_string()))
|
||||
);
|
||||
|
||||
// No float tests, leading zeros get parsed as decimal anyway.
|
||||
}
|
||||
|
|
@ -1128,51 +1166,62 @@ mod tests {
|
|||
assert_eq!(Ok(0b1011), u64::extended_parse("+0b1011"));
|
||||
assert_eq!(Ok(-0b1011), i64::extended_parse("-0b1011"));
|
||||
|
||||
assert!(matches!(
|
||||
assert_eq!(
|
||||
u64::extended_parse("0b"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "b"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
u64::extended_parse("0b."),
|
||||
Err(ExtendedParserError::PartialMatch(0, "b."))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "b.".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
u64::extended_parse("-0b"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "b"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
i64::extended_parse("0b"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "b"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
i64::extended_parse("-0b"),
|
||||
Err(ExtendedParserError::PartialMatch(0, "b"))
|
||||
));
|
||||
Err(ExtendedParserError::PartialMatch(0, "b".to_string()))
|
||||
);
|
||||
|
||||
// Binary not allowed for floats
|
||||
assert!(matches!(
|
||||
assert_eq!(
|
||||
f64::extended_parse("0b100"),
|
||||
Err(ExtendedParserError::PartialMatch(0f64, "b100"))
|
||||
));
|
||||
assert!(matches!(
|
||||
Err(ExtendedParserError::PartialMatch(0f64, "b100".to_string()))
|
||||
);
|
||||
assert_eq!(
|
||||
f64::extended_parse("0b100.1"),
|
||||
Err(ExtendedParserError::PartialMatch(0f64, "b100.1"))
|
||||
));
|
||||
Err(ExtendedParserError::PartialMatch(
|
||||
0f64,
|
||||
"b100.1".to_string()
|
||||
))
|
||||
);
|
||||
|
||||
assert!(match ExtendedBigDecimal::extended_parse("0b100.1") {
|
||||
Err(ExtendedParserError::PartialMatch(ebd, "b100.1")) =>
|
||||
ebd == ExtendedBigDecimal::zero(),
|
||||
_ => false,
|
||||
});
|
||||
assert_eq!(
|
||||
ExtendedBigDecimal::extended_parse("0b100.1"),
|
||||
Err(ExtendedParserError::PartialMatch(
|
||||
ExtendedBigDecimal::zero(),
|
||||
"b100.1".to_string()
|
||||
))
|
||||
);
|
||||
|
||||
assert!(match ExtendedBigDecimal::extended_parse("0b") {
|
||||
Err(ExtendedParserError::PartialMatch(ebd, "b")) => ebd == ExtendedBigDecimal::zero(),
|
||||
_ => false,
|
||||
});
|
||||
assert!(match ExtendedBigDecimal::extended_parse("0b.") {
|
||||
Err(ExtendedParserError::PartialMatch(ebd, "b.")) => ebd == ExtendedBigDecimal::zero(),
|
||||
_ => false,
|
||||
});
|
||||
assert_eq!(
|
||||
ExtendedBigDecimal::extended_parse("0b"),
|
||||
Err(ExtendedParserError::PartialMatch(
|
||||
ExtendedBigDecimal::zero(),
|
||||
"b".to_string()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
ExtendedBigDecimal::extended_parse("0b."),
|
||||
Err(ExtendedParserError::PartialMatch(
|
||||
ExtendedBigDecimal::zero(),
|
||||
"b.".to_string()
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -1185,15 +1234,15 @@ mod tests {
|
|||
|
||||
// Ensure that trailing whitespace is still a partial match
|
||||
assert_eq!(
|
||||
Err(ExtendedParserError::PartialMatch(6, " ")),
|
||||
Err(ExtendedParserError::PartialMatch(6, " ".to_string())),
|
||||
u64::extended_parse("0x6 ")
|
||||
);
|
||||
assert_eq!(
|
||||
Err(ExtendedParserError::PartialMatch(7, "\t")),
|
||||
Err(ExtendedParserError::PartialMatch(7, "\t".to_string())),
|
||||
u64::extended_parse("0x7\t")
|
||||
);
|
||||
assert_eq!(
|
||||
Err(ExtendedParserError::PartialMatch(8, "\n")),
|
||||
Err(ExtendedParserError::PartialMatch(8, "\n".to_string())),
|
||||
u64::extended_parse("0x8\n")
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -311,23 +311,39 @@ pub fn read_yes() -> bool {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct NonUtf8OsStrError {
|
||||
input_lossy_string: String,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for NonUtf8OsStrError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
use os_display::Quotable;
|
||||
let quoted = self.input_lossy_string.quote();
|
||||
f.write_fmt(format_args!(
|
||||
"invalid UTF-8 input {quoted} encountered when converting to bytes on a platform that doesn't expose byte arguments",
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for NonUtf8OsStrError {}
|
||||
impl error::UError for NonUtf8OsStrError {}
|
||||
|
||||
/// Converts an `OsStr` to a UTF-8 `&[u8]`.
|
||||
///
|
||||
/// This always succeeds on unix platforms,
|
||||
/// and fails on other platforms if the string can't be coerced to UTF-8.
|
||||
pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
||||
pub fn os_str_as_bytes(os_string: &OsStr) -> Result<&[u8], NonUtf8OsStrError> {
|
||||
#[cfg(unix)]
|
||||
let bytes = os_string.as_bytes();
|
||||
return Ok(os_string.as_bytes());
|
||||
|
||||
#[cfg(not(unix))]
|
||||
let bytes = os_string
|
||||
os_string
|
||||
.to_str()
|
||||
.ok_or_else(|| {
|
||||
mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
|
||||
})?
|
||||
.as_bytes();
|
||||
|
||||
Ok(bytes)
|
||||
.ok_or_else(|| NonUtf8OsStrError {
|
||||
input_lossy_string: os_string.to_string_lossy().into_owned(),
|
||||
})
|
||||
.map(|s| s.as_bytes())
|
||||
}
|
||||
|
||||
/// Performs a potentially lossy conversion from `OsStr` to UTF-8 bytes.
|
||||
|
|
@ -336,15 +352,13 @@ pub fn os_str_as_bytes(os_string: &OsStr) -> mods::error::UResult<&[u8]> {
|
|||
/// and wraps [`OsStr::to_string_lossy`] on non-unix platforms.
|
||||
pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
|
||||
#[cfg(unix)]
|
||||
let bytes = Cow::from(os_string.as_bytes());
|
||||
return Cow::from(os_string.as_bytes());
|
||||
|
||||
#[cfg(not(unix))]
|
||||
let bytes = match os_string.to_string_lossy() {
|
||||
match os_string.to_string_lossy() {
|
||||
Cow::Borrowed(slice) => Cow::from(slice.as_bytes()),
|
||||
Cow::Owned(owned) => Cow::from(owned.into_bytes()),
|
||||
};
|
||||
|
||||
bytes
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a `&[u8]` to an `&OsStr`,
|
||||
|
|
@ -354,13 +368,12 @@ pub fn os_str_as_bytes_lossy(os_string: &OsStr) -> Cow<[u8]> {
|
|||
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
|
||||
pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
|
||||
#[cfg(unix)]
|
||||
let os_str = Cow::Borrowed(OsStr::from_bytes(bytes));
|
||||
#[cfg(not(unix))]
|
||||
let os_str = Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(|_| {
|
||||
mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr")
|
||||
})?));
|
||||
return Ok(Cow::Borrowed(OsStr::from_bytes(bytes)));
|
||||
|
||||
Ok(os_str)
|
||||
#[cfg(not(unix))]
|
||||
Ok(Cow::Owned(OsString::from(str::from_utf8(bytes).map_err(
|
||||
|_| mods::error::UUsageError::new(1, "Unable to transform bytes into OsStr"),
|
||||
)?)))
|
||||
}
|
||||
|
||||
/// Converts a `Vec<u8>` into an `OsString`, parsing as UTF-8 on non-unix platforms.
|
||||
|
|
@ -369,13 +382,12 @@ pub fn os_str_from_bytes(bytes: &[u8]) -> mods::error::UResult<Cow<'_, OsStr>> {
|
|||
/// and fails on other platforms if the bytes can't be parsed as UTF-8.
|
||||
pub fn os_string_from_vec(vec: Vec<u8>) -> mods::error::UResult<OsString> {
|
||||
#[cfg(unix)]
|
||||
let s = OsString::from_vec(vec);
|
||||
#[cfg(not(unix))]
|
||||
let s = OsString::from(String::from_utf8(vec).map_err(|_| {
|
||||
mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
|
||||
})?);
|
||||
return Ok(OsString::from_vec(vec));
|
||||
|
||||
Ok(s)
|
||||
#[cfg(not(unix))]
|
||||
Ok(OsString::from(String::from_utf8(vec).map_err(|_| {
|
||||
mods::error::UUsageError::new(1, "invalid UTF-8 was detected in one or more arguments")
|
||||
})?))
|
||||
}
|
||||
|
||||
/// Converts an `OsString` into a `Vec<u8>`, parsing as UTF-8 on non-unix platforms.
|
||||
|
|
|
|||
|
|
@ -805,7 +805,7 @@ fn test_overflow() {
|
|||
fn partial_char() {
|
||||
new_ucmd!()
|
||||
.args(&["%d", "'abc"])
|
||||
.fails_with_code(1)
|
||||
.succeeds()
|
||||
.stdout_is("97")
|
||||
.stderr_is(
|
||||
"printf: warning: bc: character(s) following character constant have been ignored\n",
|
||||
|
|
@ -1293,23 +1293,80 @@ fn float_arg_with_whitespace() {
|
|||
|
||||
#[test]
|
||||
fn mb_input() {
|
||||
for format in ["\"á", "\'á", "'\u{e1}"] {
|
||||
let cases = vec![
|
||||
("%04x\n", "\"á", "00e1\n"),
|
||||
("%04x\n", "'á", "00e1\n"),
|
||||
("%04x\n", "'\u{e1}", "00e1\n"),
|
||||
("%i\n", "\"á", "225\n"),
|
||||
("%i\n", "'á", "225\n"),
|
||||
("%i\n", "'\u{e1}", "225\n"),
|
||||
("%f\n", "'á", "225.000000\n"),
|
||||
];
|
||||
for (format, arg, stdout) in cases {
|
||||
new_ucmd!()
|
||||
.args(&["%04x\n", format])
|
||||
.args(&[format, arg])
|
||||
.succeeds()
|
||||
.stdout_only("00e1\n");
|
||||
.stdout_only(stdout);
|
||||
}
|
||||
|
||||
let cases = vec![
|
||||
("\"á=", "="),
|
||||
("\'á-", "-"),
|
||||
("\'á=-==", "=-=="),
|
||||
("'\u{e1}++", "++"),
|
||||
("%04x\n", "\"á=", "00e1\n", "="),
|
||||
("%04x\n", "'á-", "00e1\n", "-"),
|
||||
("%04x\n", "'á=-==", "00e1\n", "=-=="),
|
||||
("%04x\n", "'á'", "00e1\n", "'"),
|
||||
("%04x\n", "'\u{e1}++", "00e1\n", "++"),
|
||||
("%04x\n", "''á'", "0027\n", "á'"),
|
||||
("%i\n", "\"á=", "225\n", "="),
|
||||
];
|
||||
|
||||
for (format, expected) in cases {
|
||||
for (format, arg, stdout, stderr) in cases {
|
||||
new_ucmd!()
|
||||
.args(&["%04x\n", format])
|
||||
.args(&[format, arg])
|
||||
.succeeds()
|
||||
.stdout_is(stdout)
|
||||
.stderr_is(format!("printf: warning: {stderr}: character(s) following character constant have been ignored\n"));
|
||||
}
|
||||
|
||||
for arg in ["\"", "'"] {
|
||||
new_ucmd!()
|
||||
.args(&["%04x\n", arg])
|
||||
.fails()
|
||||
.stderr_contains("expected a numeric value");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_family = "unix")]
|
||||
fn mb_invalid_unicode() {
|
||||
use std::ffi::OsStr;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let cases = vec![
|
||||
("%04x\n", b"\"\xe1", "00e1\n"),
|
||||
("%04x\n", b"'\xe1", "00e1\n"),
|
||||
("%i\n", b"\"\xe1", "225\n"),
|
||||
("%i\n", b"'\xe1", "225\n"),
|
||||
("%f\n", b"'\xe1", "225.000000\n"),
|
||||
];
|
||||
for (format, arg, stdout) in cases {
|
||||
new_ucmd!()
|
||||
.arg(format)
|
||||
.arg(OsStr::from_bytes(arg))
|
||||
.succeeds()
|
||||
.stdout_only(stdout);
|
||||
}
|
||||
|
||||
let cases = vec![
|
||||
(b"\"\xe1=".as_slice(), "="),
|
||||
(b"'\xe1-".as_slice(), "-"),
|
||||
(b"'\xe1=-==".as_slice(), "=-=="),
|
||||
(b"'\xe1'".as_slice(), "'"),
|
||||
// unclear if original or replacement character is better in stderr
|
||||
//(b"''\xe1'".as_slice(), "'<27>'"),
|
||||
];
|
||||
for (arg, expected) in cases {
|
||||
new_ucmd!()
|
||||
.arg("%04x\n")
|
||||
.arg(OsStr::from_bytes(arg))
|
||||
.succeeds()
|
||||
.stdout_is("00e1\n")
|
||||
.stderr_is(format!("printf: warning: {expected}: character(s) following character constant have been ignored\n"));
|
||||
|
|
@ -1364,3 +1421,35 @@ fn positional_format_specifiers() {
|
|||
.succeeds()
|
||||
.stdout_only("Octal: 115, Int: 42, Float: 3.141590, String: hello, Hex: ff, Scientific: 1.000000e-05, Char: A, Unsigned: 100, Integer: 123");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_family = "unix")]
|
||||
fn non_utf_8_input() {
|
||||
use std::ffi::OsStr;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
// ISO-8859-1 encoded text
|
||||
// spell-checker:disable
|
||||
const INPUT_AND_OUTPUT: &[u8] =
|
||||
b"Swer an rehte g\xFCete wendet s\xEEn gem\xFCete, dem volget s\xE6lde und \xEAre.";
|
||||
// spell-checker:enable
|
||||
|
||||
let os_str = OsStr::from_bytes(INPUT_AND_OUTPUT);
|
||||
|
||||
new_ucmd!()
|
||||
.arg("%s")
|
||||
.arg(os_str)
|
||||
.succeeds()
|
||||
.stdout_only_bytes(INPUT_AND_OUTPUT);
|
||||
|
||||
new_ucmd!()
|
||||
.arg(os_str)
|
||||
.succeeds()
|
||||
.stdout_only_bytes(INPUT_AND_OUTPUT);
|
||||
|
||||
new_ucmd!()
|
||||
.arg("%d")
|
||||
.arg(os_str)
|
||||
.fails()
|
||||
.stderr_contains("expected a numeric value");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,11 +38,7 @@ This file documents why some tests are failing:
|
|||
* gnu/tests/mv/part-hardlink.sh
|
||||
* gnu/tests/od/od-N.sh
|
||||
* gnu/tests/od/od-float.sh
|
||||
* gnu/tests/printf/printf-cov.pl
|
||||
* gnu/tests/printf/printf-indexed.sh
|
||||
* gnu/tests/printf/printf-mb.sh
|
||||
* gnu/tests/printf/printf-quote.sh
|
||||
* gnu/tests/printf/printf.sh
|
||||
* gnu/tests/ptx/ptx-overrun.sh
|
||||
* gnu/tests/ptx/ptx.pl
|
||||
* gnu/tests/rm/empty-inacc.sh - https://github.com/uutils/coreutils/issues/7033
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue