mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
i18n: small refactor, add decimal stuff
This commit is contained in:
parent
f5a862c55d
commit
bb8744f115
7 changed files with 161 additions and 45 deletions
36
Cargo.lock
generated
36
Cargo.lock
generated
|
|
@ -990,6 +990,17 @@ dependencies = [
|
|||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fixed_decimal"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35943d22b2f19c0cb198ecf915910a8158e94541c89dcc63300d7799d46c2c5e"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"smallvec",
|
||||
"writeable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.2"
|
||||
|
|
@ -1290,6 +1301,29 @@ dependencies = [
|
|||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_decimal"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fec61c43fdc4e368a9f450272833123a8ef0d7083a44597660ce94d791b8a2e2"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"fixed_decimal",
|
||||
"icu_decimal_data",
|
||||
"icu_locale",
|
||||
"icu_locale_core",
|
||||
"icu_provider",
|
||||
"tinystr",
|
||||
"writeable",
|
||||
"zerovec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "icu_decimal_data"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b70963bc35f9bdf1bc66a5c1f458f4991c1dc71760e00fa06016b2c76b2738d5"
|
||||
|
||||
[[package]]
|
||||
name = "icu_locale"
|
||||
version = "2.0.0"
|
||||
|
|
@ -3946,7 +3980,9 @@ dependencies = [
|
|||
"glob",
|
||||
"hex",
|
||||
"icu_collator",
|
||||
"icu_decimal",
|
||||
"icu_locale",
|
||||
"icu_provider",
|
||||
"itertools 0.14.0",
|
||||
"libc",
|
||||
"md-5",
|
||||
|
|
|
|||
|
|
@ -314,7 +314,9 @@ glob = "0.3.1"
|
|||
half = "2.4.1"
|
||||
hostname = "0.4"
|
||||
icu_collator = "2.0.0"
|
||||
icu_decimal = "2.0.0"
|
||||
icu_locale = "2.0.0"
|
||||
icu_provider = "2.0.0"
|
||||
indicatif = "0.18.0"
|
||||
itertools = "0.14.0"
|
||||
jiff = { version = "0.2.10", default-features = false, features = [
|
||||
|
|
|
|||
|
|
@ -27,10 +27,6 @@ dns-lookup = { workspace = true, optional = true }
|
|||
dunce = { version = "1.0.4", optional = true }
|
||||
wild = "2.2.1"
|
||||
glob = { workspace = true, optional = true }
|
||||
icu_collator = { workspace = true, optional = true, features = [
|
||||
"compiled_data",
|
||||
] }
|
||||
icu_locale = { workspace = true, optional = true, features = ["compiled_data"] }
|
||||
itertools = { workspace = true, optional = true }
|
||||
time = { workspace = true, optional = true, features = [
|
||||
"formatting",
|
||||
|
|
@ -59,6 +55,16 @@ bigdecimal = { workspace = true, optional = true }
|
|||
num-traits = { workspace = true, optional = true }
|
||||
selinux = { workspace = true, optional = true }
|
||||
|
||||
# icu stuff
|
||||
icu_collator = { workspace = true, optional = true, features = [
|
||||
"compiled_data",
|
||||
] }
|
||||
icu_decimal = { workspace = true, optional = true, features = [
|
||||
"compiled_data",
|
||||
] }
|
||||
icu_locale = { workspace = true, optional = true, features = ["compiled_data"] }
|
||||
icu_provider = { workspace = true, optional = true }
|
||||
|
||||
# Fluent dependencies
|
||||
fluent = { workspace = true }
|
||||
fluent-syntax = { workspace = true }
|
||||
|
|
@ -108,7 +114,9 @@ format = [
|
|||
"num-traits",
|
||||
"quoting-style",
|
||||
]
|
||||
i18n = ["icu_collator", "icu_locale"]
|
||||
i18n-all = ["i18n-decimal"]
|
||||
i18n-common = ["icu_locale", "icu_provider"]
|
||||
i18n-decimal = ["i18n-common", "icu_decimal", "icu_locale", "icu_provider"]
|
||||
mode = ["libc"]
|
||||
perms = ["entries", "libc", "walkdir"]
|
||||
buf-copy = []
|
||||
|
|
@ -116,7 +124,7 @@ parser = ["extendedbigdecimal", "glob", "num-traits"]
|
|||
pipes = []
|
||||
process = ["libc"]
|
||||
proc-info = ["tty", "walkdir"]
|
||||
quoting-style = ["i18n"]
|
||||
quoting-style = ["i18n-common"]
|
||||
ranges = []
|
||||
ringbuffer = []
|
||||
selinux = ["dep:selinux"]
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ pub mod format;
|
|||
pub mod fs;
|
||||
#[cfg(feature = "fsext")]
|
||||
pub mod fsext;
|
||||
#[cfg(feature = "i18n")]
|
||||
#[cfg(feature = "i18n-common")]
|
||||
pub mod i18n;
|
||||
#[cfg(feature = "lines")]
|
||||
pub mod lines;
|
||||
|
|
|
|||
51
src/uucore/src/lib/features/i18n/decimal.rs
Normal file
51
src/uucore/src/lib/features/i18n/decimal.rs
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use icu_decimal::provider::DecimalSymbolsV1;
|
||||
use icu_locale::Locale;
|
||||
use icu_provider::prelude::*;
|
||||
|
||||
use crate::i18n::get_numeric_locale;
|
||||
|
||||
/// Return the decimal separator for the given locale
|
||||
fn get_decimal_separator(loc: Locale) -> String {
|
||||
let data_locale = DataLocale::from(loc);
|
||||
|
||||
let request = DataRequest {
|
||||
id: DataIdentifierBorrowed::for_locale(&data_locale),
|
||||
metadata: DataRequestMetadata::default(),
|
||||
};
|
||||
|
||||
let response: DataResponse<DecimalSymbolsV1> =
|
||||
icu_decimal::provider::Baked.load(request).unwrap();
|
||||
|
||||
response.payload.get().decimal_separator().to_string()
|
||||
}
|
||||
|
||||
/// Return the decimal separator from the language we're working with.
|
||||
/// Example:
|
||||
/// Say we need to format 1000.5
|
||||
/// en_US: 1,000.5 -> decimal separator is '.'
|
||||
/// fr_FR: 1 000,5 -> decimal separator is ','
|
||||
pub fn locale_decimal_separator() -> &'static str {
|
||||
static DECIMAL_SEP: OnceLock<String> = OnceLock::new();
|
||||
|
||||
DECIMAL_SEP.get_or_init(|| get_decimal_separator(get_numeric_locale().0.clone()))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use icu_locale::locale;
|
||||
|
||||
use super::get_decimal_separator;
|
||||
|
||||
#[test]
|
||||
fn test_simple_separator() {
|
||||
assert_eq!(get_decimal_separator(locale!("en")), ".");
|
||||
assert_eq!(get_decimal_separator(locale!("fr")), ",");
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,15 @@
|
|||
// This file is part of the uutils coreutils package.
|
||||
//
|
||||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use icu_locale::{Locale, locale};
|
||||
|
||||
#[cfg(feature = "i18n-decimal")]
|
||||
pub mod decimal;
|
||||
|
||||
/// The encoding specified by the locale, if specified
|
||||
/// Currently only supports ASCII and UTF-8 for the sake of simplicity.
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||
|
|
@ -12,48 +20,59 @@ pub enum UEncoding {
|
|||
|
||||
const DEFAULT_LOCALE: Locale = locale!("en-US-posix");
|
||||
|
||||
/// Deduce the locale from the current environment
|
||||
/// Look at 3 environment variables in the following order
|
||||
///
|
||||
/// 1. LC_ALL
|
||||
/// 2. `locale_name`
|
||||
/// 3. LANG
|
||||
///
|
||||
/// Or fallback on Posix locale, with ASCII encoding.
|
||||
fn get_locale_from_env(locale_name: &str) -> (Locale, UEncoding) {
|
||||
let locale_var = ["LC_ALL", locale_name, "LANG"]
|
||||
.iter()
|
||||
.find_map(|&key| std::env::var(key).ok());
|
||||
|
||||
if let Some(locale_var_str) = locale_var {
|
||||
let mut split = locale_var_str.split(&['.', '@']);
|
||||
|
||||
if let Some(simple) = split.next() {
|
||||
// Naively convert the locale name to BCP47 tag format.
|
||||
//
|
||||
// See https://en.wikipedia.org/wiki/IETF_language_tag
|
||||
let bcp47 = simple.replace("_", "-");
|
||||
let locale = Locale::try_from_str(&bcp47).unwrap_or(DEFAULT_LOCALE);
|
||||
|
||||
// If locale parsing failed, parse the encoding part of the
|
||||
// locale. Treat the special case of the given locale being "C"
|
||||
// which becomes the default locale.
|
||||
let encoding = if (locale != DEFAULT_LOCALE || bcp47 == "C")
|
||||
&& split
|
||||
.next()
|
||||
.is_some_and(|enc| enc.to_lowercase() == "utf-8")
|
||||
{
|
||||
UEncoding::Utf8
|
||||
} else {
|
||||
UEncoding::Ascii
|
||||
};
|
||||
return (locale, encoding);
|
||||
}
|
||||
}
|
||||
// Default POSIX locale representing LC_ALL=C
|
||||
(DEFAULT_LOCALE, UEncoding::Ascii)
|
||||
}
|
||||
|
||||
/// Get the collating locale from the environment
|
||||
fn get_collating_locale() -> &'static (Locale, UEncoding) {
|
||||
static COLLATING_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();
|
||||
|
||||
COLLATING_LOCALE.get_or_init(|| {
|
||||
// Look at 3 environment variables in the following order
|
||||
//
|
||||
// 1. LC_ALL
|
||||
// 2. LC_COLLATE
|
||||
// 3. LANG
|
||||
//
|
||||
// Or fallback on Posix locale, with ASCII encoding.
|
||||
COLLATING_LOCALE.get_or_init(|| {get_locale_from_env("LC_COLLATE")})
|
||||
}
|
||||
|
||||
let locale_var = std::env::var("LC_ALL")
|
||||
.or_else(|_| std::env::var("LC_COLLATE"))
|
||||
.or_else(|_| std::env::var("LANG"));
|
||||
/// Get the numeric locale from the environment
|
||||
pub fn get_numeric_locale() -> &'static (Locale, UEncoding) {
|
||||
static NUMERIC_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new();
|
||||
|
||||
if let Ok(locale_var_str) = locale_var {
|
||||
let mut split = locale_var_str.split(&['.', '@']);
|
||||
|
||||
if let Some(simple) = split.next() {
|
||||
let bcp47 = simple.replace("_", "-");
|
||||
let locale = Locale::try_from_str(&bcp47).unwrap_or(DEFAULT_LOCALE);
|
||||
|
||||
// If locale parsing failed, parse the encoding part of the
|
||||
// locale. Treat the special case of the given locale being "C"
|
||||
// which becomes the default locale.
|
||||
let encoding = if (locale != DEFAULT_LOCALE || bcp47 == "C")
|
||||
&& split.next() == Some("UTF-8")
|
||||
{
|
||||
UEncoding::Utf8
|
||||
} else {
|
||||
UEncoding::Ascii
|
||||
};
|
||||
return (locale, encoding);
|
||||
} else {
|
||||
return (DEFAULT_LOCALE, UEncoding::Ascii);
|
||||
};
|
||||
}
|
||||
// Default POSIX locale representing LC_ALL=C
|
||||
(DEFAULT_LOCALE, UEncoding::Ascii)
|
||||
})
|
||||
NUMERIC_LOCALE.get_or_init(|| {get_locale_from_env("LC_NUMERIC")})
|
||||
}
|
||||
|
||||
/// Return the encoding deduced from the locale environment variable.
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ pub use crate::features::fast_inc;
|
|||
pub use crate::features::format;
|
||||
#[cfg(feature = "fs")]
|
||||
pub use crate::features::fs;
|
||||
#[cfg(feature = "i18n")]
|
||||
#[cfg(feature = "i18n-common")]
|
||||
pub use crate::features::i18n;
|
||||
#[cfg(feature = "lines")]
|
||||
pub use crate::features::lines;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue