Merge pull request #7431 from RenjiSann/printf-fix-octal-escape

printf: fix octal escape parsing
This commit is contained in:
Daniel Hofstetter 2025-03-11 15:54:49 +01:00 committed by GitHub
commit 9bbe579cb6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 55 additions and 21 deletions

View file

@ -9,7 +9,7 @@ use std::env;
use std::ffi::{OsStr, OsString};
use std::io::{self, StdoutLock, Write};
use uucore::error::{UResult, USimpleError};
use uucore::format::{parse_escape_only, EscapedChar, FormatChar};
use uucore::format::{parse_escape_only, EscapedChar, FormatChar, OctalParsing};
use uucore::{format_usage, help_about, help_section, help_usage};
const ABOUT: &str = help_about!("echo.md");
@ -135,7 +135,7 @@ fn execute(
}
if escaped {
for item in parse_escape_only(bytes) {
for item in parse_escape_only(bytes, OctalParsing::ThreeDigits) {
match item {
EscapedChar::End => return Ok(()),
c => c.write(&mut *stdout_lock)?,

View file

@ -17,24 +17,37 @@ pub enum EscapedChar {
End,
}
#[repr(u8)]
#[derive(Clone, Copy, Default)]
pub enum OctalParsing {
#[default]
TwoDigits = 2,
ThreeDigits = 3,
}
#[derive(Clone, Copy)]
enum Base {
Oct = 8,
Hex = 16,
Oct(OctalParsing),
Hex,
}
impl Base {
fn as_base(&self) -> u8 {
match self {
Base::Oct(_) => 8,
Base::Hex => 16,
}
}
fn max_digits(&self) -> u8 {
match self {
Self::Oct => 3,
Self::Oct(parsing) => *parsing as u8,
Self::Hex => 2,
}
}
fn convert_digit(&self, c: u8) -> Option<u8> {
match self {
Self::Oct => {
Self::Oct(_) => {
if matches!(c, b'0'..=b'7') {
Some(c - b'0')
} else {
@ -68,7 +81,7 @@ fn parse_code(input: &mut &[u8], base: Base) -> Option<u8> {
let Some(n) = base.convert_digit(*c) else {
break;
};
ret = ret.wrapping_mul(base as u8).wrapping_add(n);
ret = ret.wrapping_mul(base.as_base()).wrapping_add(n);
*input = rest;
}
@ -87,7 +100,9 @@ fn parse_unicode(input: &mut &[u8], digits: u8) -> Option<char> {
for _ in 1..digits {
let (c, rest) = input.split_first()?;
let n = Base::Hex.convert_digit(*c)?;
ret = ret.wrapping_mul(Base::Hex as u32).wrapping_add(n as u32);
ret = ret
.wrapping_mul(Base::Hex.as_base() as u32)
.wrapping_add(n as u32);
*input = rest;
}
@ -99,13 +114,16 @@ fn parse_unicode(input: &mut &[u8], digits: u8) -> Option<char> {
pub struct EscapeError {}
/// Parse an escape sequence, like `\n` or `\xff`, etc.
pub fn parse_escape_code(rest: &mut &[u8]) -> Result<EscapedChar, EscapeError> {
pub fn parse_escape_code(
rest: &mut &[u8],
zero_octal_parsing: OctalParsing,
) -> Result<EscapedChar, EscapeError> {
if let [c, new_rest @ ..] = rest {
// This is for the \NNN syntax for octal sequences.
// Note that '0' is intentionally omitted because that
// would be the \0NNN syntax.
if let b'1'..=b'7' = c {
if let Some(parsed) = parse_code(rest, Base::Oct) {
if let Some(parsed) = parse_code(rest, Base::Oct(OctalParsing::ThreeDigits)) {
return Ok(EscapedChar::Byte(parsed));
}
}
@ -131,7 +149,7 @@ pub fn parse_escape_code(rest: &mut &[u8]) -> Result<EscapedChar, EscapeError> {
}
}
b'0' => Ok(EscapedChar::Byte(
parse_code(rest, Base::Oct).unwrap_or(b'\0'),
parse_code(rest, Base::Oct(zero_octal_parsing)).unwrap_or(b'\0'),
)),
b'u' => Ok(EscapedChar::Char(parse_unicode(rest, 4).unwrap_or('\0'))),
b'U' => Ok(EscapedChar::Char(parse_unicode(rest, 8).unwrap_or('\0'))),

View file

@ -51,7 +51,7 @@ use os_display::Quotable;
use crate::error::UError;
pub use self::{
escape::{parse_escape_code, EscapedChar},
escape::{parse_escape_code, EscapedChar, OctalParsing},
num_format::Formatter,
};
@ -184,10 +184,12 @@ pub fn parse_spec_and_escape(
}
[b'\\', rest @ ..] => {
current = rest;
Some(match parse_escape_code(&mut current) {
Ok(c) => Ok(FormatItem::Char(c)),
Err(_) => Err(FormatError::MissingHex),
})
Some(
match parse_escape_code(&mut current, OctalParsing::default()) {
Ok(c) => Ok(FormatItem::Char(c)),
Err(_) => Err(FormatError::MissingHex),
},
)
}
[c, rest @ ..] => {
current = rest;
@ -224,13 +226,19 @@ pub fn parse_spec_only(
}
/// Parse a format string containing escape sequences
pub fn parse_escape_only(fmt: &[u8]) -> impl Iterator<Item = EscapedChar> + '_ {
pub fn parse_escape_only(
fmt: &[u8],
zero_octal_parsing: OctalParsing,
) -> impl Iterator<Item = EscapedChar> + '_ {
let mut current = fmt;
std::iter::from_fn(move || match current {
[] => None,
[b'\\', rest @ ..] => {
current = rest;
Some(parse_escape_code(&mut current).unwrap_or(EscapedChar::Backslash(b'x')))
Some(
parse_escape_code(&mut current, zero_octal_parsing)
.unwrap_or(EscapedChar::Backslash(b'x')),
)
}
[c, rest @ ..] => {
current = rest;

View file

@ -12,7 +12,7 @@ use super::{
self, Case, FloatVariant, ForceDecimal, Formatter, NumberAlignment, PositiveSign, Prefix,
UnsignedIntVariant,
},
parse_escape_only, ArgumentIter, FormatChar, FormatError,
parse_escape_only, ArgumentIter, FormatChar, FormatError, OctalParsing,
};
use std::{io::Write, ops::ControlFlow};
@ -348,7 +348,7 @@ impl Spec {
Self::EscapedString => {
let s = args.get_str();
let mut parsed = Vec::new();
for c in parse_escape_only(s.as_bytes()) {
for c in parse_escape_only(s.as_bytes(), OctalParsing::default()) {
match c.write(&mut parsed)? {
ControlFlow::Continue(()) => {}
ControlFlow::Break(()) => {

View file

@ -59,6 +59,14 @@ fn escaped_octal() {
new_ucmd!().args(&["\\101"]).succeeds().stdout_only("A");
}
#[test]
fn escaped_octal_and_newline() {
new_ucmd!()
.args(&["\\0377\\n"])
.succeeds()
.stdout_only("\x1F7\n");
}
#[test]
fn escaped_unicode_four_digit() {
new_ucmd!().args(&["\\u0125"]).succeeds().stdout_only("ĥ");