mirror of
https://github.com/uutils/coreutils.git
synced 2025-08-04 19:08:35 +00:00
Merge pull request #8179 from RenjiSann/locale-aware-quoting
i18n: Locale-aware quoting
This commit is contained in:
commit
2b5dfe612c
18 changed files with 1543 additions and 586 deletions
|
@ -61,7 +61,9 @@ use uucore::libc::{S_IXGRP, S_IXOTH, S_IXUSR};
|
|||
use uucore::libc::{dev_t, major, minor};
|
||||
use uucore::line_ending::LineEnding;
|
||||
use uucore::locale::{get_message, get_message_with_args};
|
||||
use uucore::quoting_style::{self, QuotingStyle, escape_name};
|
||||
use uucore::quoting_style::{
|
||||
self, QuotingStyle, locale_aware_escape_dir_name, locale_aware_escape_name,
|
||||
};
|
||||
use uucore::{
|
||||
display::Quotable,
|
||||
error::{UError, UResult, set_exit_code},
|
||||
|
@ -2008,7 +2010,7 @@ fn show_dir_name(
|
|||
config: &Config,
|
||||
) -> std::io::Result<()> {
|
||||
let escaped_name =
|
||||
quoting_style::escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style);
|
||||
locale_aware_escape_dir_name(path_data.p_buf.as_os_str(), &config.quoting_style);
|
||||
|
||||
let name = if config.hyperlink && !config.dired {
|
||||
create_hyperlink(&escaped_name, path_data)
|
||||
|
@ -2509,7 +2511,7 @@ fn display_items(
|
|||
// option, print the security context to the left of the size column.
|
||||
|
||||
let quoted = items.iter().any(|item| {
|
||||
let name = escape_name(&item.display_name, &config.quoting_style);
|
||||
let name = locale_aware_escape_name(&item.display_name, &config.quoting_style);
|
||||
os_str_starts_with(&name, b"'")
|
||||
});
|
||||
|
||||
|
@ -3152,7 +3154,7 @@ fn classify_file(path: &PathData, out: &mut BufWriter<Stdout>) -> Option<char> {
|
|||
/// Takes a [`PathData`] struct and returns a cell with a name ready for displaying.
|
||||
///
|
||||
/// This function relies on the following parameters in the provided `&Config`:
|
||||
/// * `config.quoting_style` to decide how we will escape `name` using [`escape_name`].
|
||||
/// * `config.quoting_style` to decide how we will escape `name` using [`locale_aware_escape_name`].
|
||||
/// * `config.inode` decides whether to display inode numbers beside names using [`get_inode`].
|
||||
/// * `config.color` decides whether it's going to color `name` using [`color_name`].
|
||||
/// * `config.indicator_style` to append specific characters to `name` using [`classify_file`].
|
||||
|
@ -3173,7 +3175,7 @@ fn display_item_name(
|
|||
current_column: LazyCell<usize, Box<dyn FnOnce() -> usize + '_>>,
|
||||
) -> OsString {
|
||||
// This is our return value. We start by `&path.display_name` and modify it along the way.
|
||||
let mut name = escape_name(&path.display_name, &config.quoting_style);
|
||||
let mut name = locale_aware_escape_name(&path.display_name, &config.quoting_style);
|
||||
|
||||
let is_wrap =
|
||||
|namelen: usize| config.width != 0 && *current_column + namelen > config.width.into();
|
||||
|
@ -3265,7 +3267,7 @@ fn display_item_name(
|
|||
name.push(path.p_buf.read_link().unwrap());
|
||||
} else {
|
||||
name.push(color_name(
|
||||
escape_name(target.as_os_str(), &config.quoting_style),
|
||||
locale_aware_escape_name(target.as_os_str(), &config.quoting_style),
|
||||
path,
|
||||
style_manager,
|
||||
&mut state.out,
|
||||
|
@ -3276,7 +3278,10 @@ fn display_item_name(
|
|||
} else {
|
||||
// If no coloring is required, we just use target as is.
|
||||
// Apply the right quoting
|
||||
name.push(escape_name(target.as_os_str(), &config.quoting_style));
|
||||
name.push(locale_aware_escape_name(
|
||||
target.as_os_str(),
|
||||
&config.quoting_style,
|
||||
));
|
||||
}
|
||||
}
|
||||
Err(err) => {
|
||||
|
|
|
@ -259,7 +259,9 @@ impl<'a> Input<'a> {
|
|||
Self::Path(path) => {
|
||||
let path = path.as_os_str();
|
||||
if path.to_string_lossy().contains('\n') {
|
||||
Some(Cow::Owned(quoting_style::escape_name(path, QS_ESCAPE)))
|
||||
Some(Cow::Owned(quoting_style::locale_aware_escape_name(
|
||||
path, QS_ESCAPE,
|
||||
)))
|
||||
} else {
|
||||
Some(Cow::Borrowed(path))
|
||||
}
|
||||
|
@ -759,7 +761,7 @@ fn files0_iter_file<'a>(path: &Path) -> UResult<impl Iterator<Item = InputIterIt
|
|||
"wc-error-cannot-open-for-reading",
|
||||
HashMap::from([(
|
||||
"path".to_string(),
|
||||
quoting_style::escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
|
||||
quoting_style::locale_aware_escape_name(path.as_os_str(), QS_QUOTE_ESCAPE)
|
||||
.into_string()
|
||||
.expect("All escaped names with the escaping option return valid strings."),
|
||||
)]),
|
||||
|
@ -812,7 +814,7 @@ fn files0_iter<'a>(
|
|||
}
|
||||
|
||||
fn escape_name_wrapper(name: &OsStr) -> String {
|
||||
quoting_style::escape_name(name, QS_ESCAPE)
|
||||
quoting_style::locale_aware_escape_name(name, QS_ESCAPE)
|
||||
.into_string()
|
||||
.expect("All escaped names with the escaping option return valid strings.")
|
||||
}
|
||||
|
|
|
@ -27,6 +27,10 @@ dns-lookup = { workspace = true, optional = true }
|
|||
dunce = { version = "1.0.4", optional = true }
|
||||
wild = "2.2.1"
|
||||
glob = { workspace = true, optional = true }
|
||||
icu_collator = { workspace = true, optional = true, features = [
|
||||
"compiled_data",
|
||||
] }
|
||||
icu_locale = { workspace = true, optional = true, features = ["compiled_data"] }
|
||||
itertools = { workspace = true, optional = true }
|
||||
time = { workspace = true, optional = true, features = [
|
||||
"formatting",
|
||||
|
@ -106,6 +110,7 @@ format = [
|
|||
"num-traits",
|
||||
"quoting-style",
|
||||
]
|
||||
i18n = ["icu_collator", "icu_locale"]
|
||||
mode = ["libc"]
|
||||
perms = ["entries", "libc", "walkdir"]
|
||||
buf-copy = []
|
||||
|
@ -113,7 +118,7 @@ parser = ["extendedbigdecimal", "glob", "num-traits"]
|
|||
pipes = []
|
||||
process = ["libc"]
|
||||
proc-info = ["tty", "walkdir"]
|
||||
quoting-style = []
|
||||
quoting-style = ["i18n"]
|
||||
ranges = []
|
||||
ringbuffer = []
|
||||
selinux = ["dep:selinux"]
|
||||
|
|
|
@ -26,6 +26,8 @@ pub mod format;
|
|||
pub mod fs;
|
||||
#[cfg(feature = "fsext")]
|
||||
pub mod fsext;
|
||||
#[cfg(feature = "i18n")]
|
||||
pub mod i18n;
|
||||
#[cfg(feature = "lines")]
|
||||
pub mod lines;
|
||||
#[cfg(feature = "parser")]
|
||||
|
|
|
@ -8,7 +8,7 @@ use crate::format::spec::ArgumentLocation;
|
|||
use crate::{
|
||||
error::set_exit_code,
|
||||
parser::num_parser::{ExtendedParser, ExtendedParserError},
|
||||
quoting_style::{Quotes, QuotingStyle, escape_name},
|
||||
quoting_style::{Quotes, QuotingStyle, locale_aware_escape_name},
|
||||
show_error, show_warning,
|
||||
};
|
||||
use os_display::Quotable;
|
||||
|
@ -153,7 +153,7 @@ fn extract_value<T: Default>(p: Result<T, ExtendedParserError<'_, T>>, input: &s
|
|||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
set_exit_code(1);
|
||||
let input = escape_name(
|
||||
let input = locale_aware_escape_name(
|
||||
OsStr::new(input),
|
||||
&QuotingStyle::C {
|
||||
quotes: Quotes::None,
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
// spell-checker:ignore (vars) intmax ptrdiff padlen
|
||||
|
||||
use crate::quoting_style::{QuotingStyle, escape_name};
|
||||
use crate::quoting_style::{QuotingStyle, locale_aware_escape_name};
|
||||
|
||||
use super::{
|
||||
ExtendedBigDecimal, FormatChar, FormatError, OctalParsing,
|
||||
|
@ -402,7 +402,7 @@ impl Spec {
|
|||
writer.write_all(&parsed).map_err(FormatError::IoError)
|
||||
}
|
||||
Self::QuotedString { position } => {
|
||||
let s = escape_name(
|
||||
let s = locale_aware_escape_name(
|
||||
args.next_string(position).as_ref(),
|
||||
&QuotingStyle::Shell {
|
||||
escape: true,
|
||||
|
|
62
src/uucore/src/lib/features/i18n/mod.rs
Normal file
62
src/uucore/src/lib/features/i18n/mod.rs
Normal file
|
@ -0,0 +1,62 @@
|
|||
use std::sync::OnceLock;
|
||||
|
||||
use icu_locale::{Locale, locale};
|
||||
|
||||
/// The encoding specified by the locale, if specified
|
||||
/// Currently only supports ASCII and UTF-8 for the sake of simplicity.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
pub enum UEncoding {
|
||||
Ascii,
|
||||
Utf8,
|
||||
}
|
||||
|
||||
const DEFAULT_LOCALE: Locale = locale!("en-US-posix");
|
||||
|
||||
/// Deduce the locale from the current environment
|
||||
fn get_collating_locale() -> &'static (Locale, UEncoding) {
|
||||
static COLLATING_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();
|
||||
|
||||
COLLATING_LOCALE.get_or_init(|| {
|
||||
// Look at 3 environment variables in the following order
|
||||
//
|
||||
// 1. LC_ALL
|
||||
// 2. LC_COLLATE
|
||||
// 3. LANG
|
||||
//
|
||||
// Or fallback on Posix locale, with ASCII encoding.
|
||||
|
||||
let locale_var = std::env::var("LC_ALL")
|
||||
.or_else(|_| std::env::var("LC_COLLATE"))
|
||||
.or_else(|_| std::env::var("LANG"));
|
||||
|
||||
if let Ok(locale_var_str) = locale_var {
|
||||
let mut split = locale_var_str.split(&['.', '@']);
|
||||
|
||||
if let Some(simple) = split.next() {
|
||||
let bcp47 = simple.replace("_", "-");
|
||||
let locale = Locale::try_from_str(&bcp47).unwrap_or(DEFAULT_LOCALE);
|
||||
|
||||
// If locale parsing failed, parse the encoding part of the
|
||||
// locale. Treat the special case of the given locale being "C"
|
||||
// which becomes the default locale.
|
||||
let encoding = if (locale != DEFAULT_LOCALE || bcp47 == "C")
|
||||
&& split.next() == Some("UTF-8")
|
||||
{
|
||||
UEncoding::Utf8
|
||||
} else {
|
||||
UEncoding::Ascii
|
||||
};
|
||||
return (locale, encoding);
|
||||
} else {
|
||||
return (DEFAULT_LOCALE, UEncoding::Ascii);
|
||||
};
|
||||
}
|
||||
// Default POSIX locale representing LC_ALL=C
|
||||
(DEFAULT_LOCALE, UEncoding::Ascii)
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the encoding deduced from the locale environment variable.
|
||||
pub fn get_locale_encoding() -> UEncoding {
|
||||
get_collating_locale().1
|
||||
}
|
57
src/uucore/src/lib/features/quoting_style/c_quoter.rs
Normal file
57
src/uucore/src/lib/features/quoting_style/c_quoter.rs
Normal file
|
@ -0,0 +1,57 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use super::{EscapedChar, Quoter, Quotes};
|
||||
|
||||
pub(super) struct CQuoter {
|
||||
/// The type of quotes to use.
|
||||
quotes: Quotes,
|
||||
|
||||
dirname: bool,
|
||||
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl CQuoter {
|
||||
pub fn new(quotes: Quotes, dirname: bool, size_hint: usize) -> Self {
|
||||
let mut buffer = Vec::with_capacity(size_hint);
|
||||
match quotes {
|
||||
Quotes::None => (),
|
||||
Quotes::Single => buffer.push(b'\''),
|
||||
Quotes::Double => buffer.push(b'"'),
|
||||
}
|
||||
|
||||
Self {
|
||||
quotes,
|
||||
dirname,
|
||||
buffer,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Quoter for CQuoter {
|
||||
fn push_char(&mut self, input: char) {
|
||||
let escaped: String = EscapedChar::new_c(input, self.quotes, self.dirname)
|
||||
.hide_control()
|
||||
.collect();
|
||||
self.buffer.extend_from_slice(escaped.as_bytes());
|
||||
}
|
||||
|
||||
fn push_invalid(&mut self, input: &[u8]) {
|
||||
for b in input {
|
||||
let escaped: String = EscapedChar::new_octal(*b).hide_control().collect();
|
||||
self.buffer.extend_from_slice(escaped.as_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
fn finalize(mut self: Box<Self>) -> Vec<u8> {
|
||||
match self.quotes {
|
||||
Quotes::None => (),
|
||||
Quotes::Single => self.buffer.push(b'\''),
|
||||
Quotes::Double => self.buffer.push(b'"'),
|
||||
}
|
||||
self.buffer
|
||||
}
|
||||
}
|
201
src/uucore/src/lib/features/quoting_style/escaped_char.rs
Normal file
201
src/uucore/src/lib/features/quoting_style/escaped_char.rs
Normal file
|
@ -0,0 +1,201 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use std::char::from_digit;
|
||||
|
||||
use super::Quotes;
|
||||
|
||||
// PR#6559 : Remove `]{}` from special shell chars.
|
||||
const SPECIAL_SHELL_CHARS: &str = "`$&*()|[;\\'\"<>?! ";
|
||||
|
||||
// This implementation is heavily inspired by the std::char::EscapeDefault implementation
|
||||
// in the Rust standard library. This custom implementation is needed because the
|
||||
// characters \a, \b, \e, \f & \v are not recognized by Rust.
|
||||
pub struct EscapedChar {
|
||||
pub state: EscapeState,
|
||||
}
|
||||
|
||||
pub enum EscapeState {
|
||||
Done,
|
||||
Char(char),
|
||||
Backslash(char),
|
||||
ForceQuote(char),
|
||||
Octal(EscapeOctal),
|
||||
}
|
||||
|
||||
/// Bytes we need to present as escaped octal, in the form of `\nnn` per byte.
|
||||
/// Only supports characters up to 2 bytes long in UTF-8.
|
||||
pub struct EscapeOctal {
|
||||
c: [u8; 2],
|
||||
state: EscapeOctalState,
|
||||
idx: u8,
|
||||
}
|
||||
|
||||
enum EscapeOctalState {
|
||||
Done,
|
||||
FirstBackslash,
|
||||
FirstValue,
|
||||
LastBackslash,
|
||||
LastValue,
|
||||
}
|
||||
|
||||
fn byte_to_octal_digit(byte: u8, idx: u8) -> u8 {
|
||||
(byte >> (idx * 3)) & 0o7
|
||||
}
|
||||
|
||||
impl Iterator for EscapeOctal {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<char> {
|
||||
match self.state {
|
||||
EscapeOctalState::Done => None,
|
||||
EscapeOctalState::FirstBackslash => {
|
||||
self.state = EscapeOctalState::FirstValue;
|
||||
Some('\\')
|
||||
}
|
||||
EscapeOctalState::LastBackslash => {
|
||||
self.state = EscapeOctalState::LastValue;
|
||||
Some('\\')
|
||||
}
|
||||
EscapeOctalState::FirstValue => {
|
||||
let octal_digit = byte_to_octal_digit(self.c[0], self.idx);
|
||||
if self.idx == 0 {
|
||||
self.state = EscapeOctalState::LastBackslash;
|
||||
self.idx = 2;
|
||||
} else {
|
||||
self.idx -= 1;
|
||||
}
|
||||
Some(from_digit(octal_digit.into(), 8).unwrap())
|
||||
}
|
||||
EscapeOctalState::LastValue => {
|
||||
let octal_digit = byte_to_octal_digit(self.c[1], self.idx);
|
||||
if self.idx == 0 {
|
||||
self.state = EscapeOctalState::Done;
|
||||
} else {
|
||||
self.idx -= 1;
|
||||
}
|
||||
Some(from_digit(octal_digit.into(), 8).unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EscapeOctal {
|
||||
fn from_char(c: char) -> Self {
|
||||
if c.len_utf8() == 1 {
|
||||
return Self::from_byte(c as u8);
|
||||
}
|
||||
|
||||
let mut buf = [0; 2];
|
||||
let _s = c.encode_utf8(&mut buf);
|
||||
Self {
|
||||
c: buf,
|
||||
idx: 2,
|
||||
state: EscapeOctalState::FirstBackslash,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_byte(b: u8) -> Self {
|
||||
Self {
|
||||
c: [0, b],
|
||||
idx: 2,
|
||||
state: EscapeOctalState::LastBackslash,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl EscapedChar {
|
||||
pub fn new_literal(c: char) -> Self {
|
||||
Self {
|
||||
state: EscapeState::Char(c),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_octal(b: u8) -> Self {
|
||||
Self {
|
||||
state: EscapeState::Octal(EscapeOctal::from_byte(b)),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new_c(c: char, quotes: Quotes, dirname: bool) -> Self {
|
||||
use EscapeState::*;
|
||||
let init_state = match c {
|
||||
'\x07' => Backslash('a'),
|
||||
'\x08' => Backslash('b'),
|
||||
'\t' => Backslash('t'),
|
||||
'\n' => Backslash('n'),
|
||||
'\x0B' => Backslash('v'),
|
||||
'\x0C' => Backslash('f'),
|
||||
'\r' => Backslash('r'),
|
||||
'\\' => Backslash('\\'),
|
||||
'\'' => match quotes {
|
||||
Quotes::Single => Backslash('\''),
|
||||
_ => Char('\''),
|
||||
},
|
||||
'"' => match quotes {
|
||||
Quotes::Double => Backslash('"'),
|
||||
_ => Char('"'),
|
||||
},
|
||||
' ' if !dirname => match quotes {
|
||||
Quotes::None => Backslash(' '),
|
||||
_ => Char(' '),
|
||||
},
|
||||
':' if dirname => Backslash(':'),
|
||||
_ if c.is_control() => Octal(EscapeOctal::from_char(c)),
|
||||
_ => Char(c),
|
||||
};
|
||||
Self { state: init_state }
|
||||
}
|
||||
|
||||
pub fn new_shell(c: char, escape: bool, quotes: Quotes) -> Self {
|
||||
use EscapeState::*;
|
||||
let init_state = match c {
|
||||
_ if !escape && c.is_control() => Char(c),
|
||||
'\x07' => Backslash('a'),
|
||||
'\x08' => Backslash('b'),
|
||||
'\t' => Backslash('t'),
|
||||
'\n' => Backslash('n'),
|
||||
'\x0B' => Backslash('v'),
|
||||
'\x0C' => Backslash('f'),
|
||||
'\r' => Backslash('r'),
|
||||
'\'' => match quotes {
|
||||
Quotes::Single => Backslash('\''),
|
||||
_ => Char('\''),
|
||||
},
|
||||
_ if c.is_control() => Octal(EscapeOctal::from_char(c)),
|
||||
_ if SPECIAL_SHELL_CHARS.contains(c) => ForceQuote(c),
|
||||
_ => Char(c),
|
||||
};
|
||||
Self { state: init_state }
|
||||
}
|
||||
|
||||
pub fn hide_control(self) -> Self {
|
||||
match self.state {
|
||||
EscapeState::Char(c) if c.is_control() => Self {
|
||||
state: EscapeState::Char('?'),
|
||||
},
|
||||
_ => self,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for EscapedChar {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<char> {
|
||||
match self.state {
|
||||
EscapeState::Backslash(c) => {
|
||||
self.state = EscapeState::Char(c);
|
||||
Some('\\')
|
||||
}
|
||||
EscapeState::Char(c) | EscapeState::ForceQuote(c) => {
|
||||
self.state = EscapeState::Done;
|
||||
Some(c)
|
||||
}
|
||||
EscapeState::Done => None,
|
||||
EscapeState::Octal(ref mut iter) => iter.next(),
|
||||
}
|
||||
}
|
||||
}
|
31
src/uucore/src/lib/features/quoting_style/literal_quoter.rs
Normal file
31
src/uucore/src/lib/features/quoting_style/literal_quoter.rs
Normal file
|
@ -0,0 +1,31 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use super::{EscapedChar, Quoter};
|
||||
|
||||
pub(super) struct LiteralQuoter(Vec<u8>);
|
||||
|
||||
impl LiteralQuoter {
|
||||
pub fn new(size_hint: usize) -> Self {
|
||||
Self(Vec::with_capacity(size_hint))
|
||||
}
|
||||
}
|
||||
|
||||
impl Quoter for LiteralQuoter {
|
||||
fn push_char(&mut self, input: char) {
|
||||
let escaped = EscapedChar::new_literal(input)
|
||||
.hide_control()
|
||||
.collect::<String>();
|
||||
self.0.extend(escaped.as_bytes());
|
||||
}
|
||||
|
||||
fn push_invalid(&mut self, input: &[u8]) {
|
||||
self.0.extend(std::iter::repeat_n(b'?', input.len()));
|
||||
}
|
||||
|
||||
fn finalize(self: Box<Self>) -> Vec<u8> {
|
||||
self.0
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load diff
241
src/uucore/src/lib/features/quoting_style/shell_quoter.rs
Normal file
241
src/uucore/src/lib/features/quoting_style/shell_quoter.rs
Normal file
|
@ -0,0 +1,241 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use super::{EscapeState, EscapedChar, Quoter, Quotes};
|
||||
|
||||
// These are characters with special meaning in the shell (e.g. bash). The
|
||||
// first const contains characters that only have a special meaning when they
|
||||
// appear at the beginning of a name.
|
||||
const SPECIAL_SHELL_CHARS_START: &[u8] = b"~#";
|
||||
|
||||
// Escaped and NonEscaped shell quoting strategies are very different.
|
||||
// Therefore, we are using separate Quoter structures for each of them.
|
||||
|
||||
pub(super) struct NonEscapedShellQuoter<'a> {
|
||||
// INIT
|
||||
/// Original name.
|
||||
reference: &'a [u8],
|
||||
|
||||
/// The quotes to be used if necessary
|
||||
quotes: Quotes,
|
||||
|
||||
/// Whether to show control and non-unicode characters, or replace them
|
||||
/// with `?`.
|
||||
show_control: bool,
|
||||
|
||||
// INTERNAL STATE
|
||||
/// Whether the name should be quoted.
|
||||
must_quote: bool,
|
||||
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<'a> NonEscapedShellQuoter<'a> {
|
||||
pub fn new(
|
||||
reference: &'a [u8],
|
||||
show_control: bool,
|
||||
always_quote: bool,
|
||||
dirname: bool,
|
||||
size_hint: usize,
|
||||
) -> Self {
|
||||
let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote);
|
||||
Self {
|
||||
reference,
|
||||
quotes,
|
||||
show_control,
|
||||
must_quote,
|
||||
buffer: Vec::with_capacity(size_hint),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Quoter for NonEscapedShellQuoter<'a> {
|
||||
fn push_char(&mut self, input: char) {
|
||||
let escaped = EscapedChar::new_shell(input, false, self.quotes);
|
||||
|
||||
let escaped = if self.show_control {
|
||||
escaped
|
||||
} else {
|
||||
escaped.hide_control()
|
||||
};
|
||||
|
||||
match escaped.state {
|
||||
EscapeState::Backslash('\'') => self.buffer.extend(b"'\\''"),
|
||||
EscapeState::ForceQuote(x) => {
|
||||
self.must_quote = true;
|
||||
self.buffer.extend(x.to_string().as_bytes());
|
||||
}
|
||||
_ => {
|
||||
self.buffer.extend(escaped.collect::<String>().as_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn push_invalid(&mut self, input: &[u8]) {
|
||||
if self.show_control {
|
||||
self.buffer.extend(input);
|
||||
} else {
|
||||
self.buffer.extend(std::iter::repeat_n(b'?', input.len()));
|
||||
}
|
||||
}
|
||||
|
||||
fn finalize(self: Box<Self>) -> Vec<u8> {
|
||||
finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes)
|
||||
}
|
||||
}
|
||||
|
||||
// We need to keep track of whether we are in a dollar expression
|
||||
// because e.g. \b\n is escaped as $'\b\n' and not like $'b'$'n'
|
||||
pub(super) struct EscapedShellQuoter<'a> {
|
||||
// INIT
|
||||
/// Original name.
|
||||
reference: &'a [u8],
|
||||
|
||||
/// The quotes to be used if necessary
|
||||
quotes: Quotes,
|
||||
|
||||
// INTERNAL STATE
|
||||
/// Whether the name should be quoted.
|
||||
must_quote: bool,
|
||||
|
||||
/// Whether we are currently in a dollar escaped environment.
|
||||
in_dollar: bool,
|
||||
|
||||
buffer: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<'a> EscapedShellQuoter<'a> {
|
||||
pub fn new(reference: &'a [u8], always_quote: bool, dirname: bool, size_hint: usize) -> Self {
|
||||
let (quotes, must_quote) = initial_quoting(reference, dirname, always_quote);
|
||||
Self {
|
||||
reference,
|
||||
quotes,
|
||||
must_quote,
|
||||
in_dollar: false,
|
||||
buffer: Vec::with_capacity(size_hint),
|
||||
}
|
||||
}
|
||||
|
||||
fn enter_dollar(&mut self) {
|
||||
if !self.in_dollar {
|
||||
self.buffer.extend(b"'$'");
|
||||
self.in_dollar = true;
|
||||
}
|
||||
}
|
||||
|
||||
fn exit_dollar(&mut self) {
|
||||
if self.in_dollar {
|
||||
self.buffer.extend(b"''");
|
||||
self.in_dollar = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Quoter for EscapedShellQuoter<'a> {
|
||||
fn push_char(&mut self, input: char) {
|
||||
let escaped = EscapedChar::new_shell(input, true, self.quotes);
|
||||
match escaped.state {
|
||||
EscapeState::Char(x) => {
|
||||
self.exit_dollar();
|
||||
self.buffer.extend(x.to_string().as_bytes());
|
||||
}
|
||||
EscapeState::ForceQuote(x) => {
|
||||
self.exit_dollar();
|
||||
self.must_quote = true;
|
||||
self.buffer.extend(x.to_string().as_bytes());
|
||||
}
|
||||
// Single quotes are not put in dollar expressions, but are escaped
|
||||
// if the string also contains double quotes. In that case, they
|
||||
// must be handled separately.
|
||||
EscapeState::Backslash('\'') => {
|
||||
self.must_quote = true;
|
||||
self.in_dollar = false;
|
||||
self.buffer.extend(b"'\\''");
|
||||
}
|
||||
_ => {
|
||||
self.enter_dollar();
|
||||
self.must_quote = true;
|
||||
self.buffer.extend(escaped.collect::<String>().as_bytes());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn push_invalid(&mut self, input: &[u8]) {
|
||||
// Early return on empty inputs.
|
||||
if input.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
self.enter_dollar();
|
||||
self.must_quote = true;
|
||||
self.buffer.extend(
|
||||
input
|
||||
.iter()
|
||||
.flat_map(|b| EscapedChar::new_octal(*b))
|
||||
.collect::<String>()
|
||||
.as_bytes(),
|
||||
);
|
||||
}
|
||||
|
||||
fn finalize(self: Box<Self>) -> Vec<u8> {
|
||||
finalize_shell_quoter(self.buffer, self.reference, self.must_quote, self.quotes)
|
||||
}
|
||||
}
|
||||
|
||||
/// Deduce the initial quoting status from the provided information
|
||||
fn initial_quoting(input: &[u8], dirname: bool, always_quote: bool) -> (Quotes, bool) {
|
||||
if input
|
||||
.iter()
|
||||
.any(|c| shell_escaped_char_set(dirname).contains(c))
|
||||
{
|
||||
(Quotes::Single, true)
|
||||
} else if input.contains(&b'\'') {
|
||||
(Quotes::Double, true)
|
||||
} else if always_quote || input.is_empty() {
|
||||
(Quotes::Single, true)
|
||||
} else {
|
||||
(Quotes::Single, false)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check whether `bytes` starts with any byte in `pattern`.
|
||||
fn bytes_start_with(bytes: &[u8], pattern: &[u8]) -> bool {
|
||||
!bytes.is_empty() && pattern.contains(&bytes[0])
|
||||
}
|
||||
|
||||
/// Return a set of characters that implies quoting of the word in
|
||||
/// shell-quoting mode.
|
||||
fn shell_escaped_char_set(is_dirname: bool) -> &'static [u8] {
|
||||
const ESCAPED_CHARS: &[u8] = b":\"`$\\^\n\t\r=";
|
||||
// the ':' colon character only induce quoting in the
|
||||
// context of ls displaying a directory name before listing its content.
|
||||
// (e.g. with the recursive flag -R)
|
||||
let start_index = if is_dirname { 0 } else { 1 };
|
||||
&ESCAPED_CHARS[start_index..]
|
||||
}
|
||||
|
||||
fn finalize_shell_quoter(
|
||||
buffer: Vec<u8>,
|
||||
reference: &[u8],
|
||||
must_quote: bool,
|
||||
quotes: Quotes,
|
||||
) -> Vec<u8> {
|
||||
let contains_quote_chars = must_quote || bytes_start_with(reference, SPECIAL_SHELL_CHARS_START);
|
||||
|
||||
if must_quote | contains_quote_chars && quotes != Quotes::None {
|
||||
let mut quoted = Vec::<u8>::with_capacity(buffer.len() + 2);
|
||||
let quote = if quotes == Quotes::Single {
|
||||
b'\''
|
||||
} else {
|
||||
b'"'
|
||||
};
|
||||
quoted.push(quote);
|
||||
quoted.extend(buffer);
|
||||
quoted.push(quote);
|
||||
quoted
|
||||
} else {
|
||||
buffer
|
||||
}
|
||||
}
|
|
@ -51,6 +51,8 @@ pub use crate::features::fast_inc;
|
|||
pub use crate::features::format;
|
||||
#[cfg(feature = "fs")]
|
||||
pub use crate::features::fs;
|
||||
#[cfg(feature = "i18n")]
|
||||
pub use crate::features::i18n;
|
||||
#[cfg(feature = "lines")]
|
||||
pub use crate::features::lines;
|
||||
#[cfg(feature = "parser")]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue