diff --git a/Cargo.lock b/Cargo.lock index d3a59a95d..5a9af63f9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -990,6 +990,17 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "fixed_decimal" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35943d22b2f19c0cb198ecf915910a8158e94541c89dcc63300d7799d46c2c5e" +dependencies = [ + "displaydoc", + "smallvec", + "writeable", +] + [[package]] name = "flate2" version = "1.1.2" @@ -1290,6 +1301,29 @@ dependencies = [ "zerovec", ] +[[package]] +name = "icu_decimal" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fec61c43fdc4e368a9f450272833123a8ef0d7083a44597660ce94d791b8a2e2" +dependencies = [ + "displaydoc", + "fixed_decimal", + "icu_decimal_data", + "icu_locale", + "icu_locale_core", + "icu_provider", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_decimal_data" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b70963bc35f9bdf1bc66a5c1f458f4991c1dc71760e00fa06016b2c76b2738d5" + [[package]] name = "icu_locale" version = "2.0.0" @@ -3946,7 +3980,9 @@ dependencies = [ "glob", "hex", "icu_collator", + "icu_decimal", "icu_locale", + "icu_provider", "itertools 0.14.0", "libc", "md-5", diff --git a/Cargo.toml b/Cargo.toml index 5d9479bc8..0ec32fb39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -314,7 +314,9 @@ glob = "0.3.1" half = "2.4.1" hostname = "0.4" icu_collator = "2.0.0" +icu_decimal = "2.0.0" icu_locale = "2.0.0" +icu_provider = "2.0.0" indicatif = "0.18.0" itertools = "0.14.0" jiff = { version = "0.2.10", default-features = false, features = [ diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index 6ff74992c..5a73d9f0d 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -27,10 +27,6 @@ dns-lookup = { workspace = true, optional = true } dunce = { version = "1.0.4", optional = true } wild = "2.2.1" glob = { workspace = true, optional = true } -icu_collator = { workspace = true, optional = true, features = [ - "compiled_data", -] } -icu_locale = { workspace = true, optional = true, features = ["compiled_data"] } itertools = { workspace = true, optional = true } time = { workspace = true, optional = true, features = [ "formatting", @@ -59,6 +55,16 @@ bigdecimal = { workspace = true, optional = true } num-traits = { workspace = true, optional = true } selinux = { workspace = true, optional = true } +# icu stuff +icu_collator = { workspace = true, optional = true, features = [ + "compiled_data", +] } +icu_decimal = { workspace = true, optional = true, features = [ + "compiled_data", +] } +icu_locale = { workspace = true, optional = true, features = ["compiled_data"] } +icu_provider = { workspace = true, optional = true } + # Fluent dependencies fluent = { workspace = true } fluent-syntax = { workspace = true } @@ -108,7 +114,9 @@ format = [ "num-traits", "quoting-style", ] -i18n = ["icu_collator", "icu_locale"] +i18n-all = ["i18n-decimal"] +i18n-common = ["icu_locale", "icu_provider"] +i18n-decimal = ["i18n-common", "icu_decimal", "icu_locale", "icu_provider"] mode = ["libc"] perms = ["entries", "libc", "walkdir"] buf-copy = [] @@ -116,7 +124,7 @@ parser = ["extendedbigdecimal", "glob", "num-traits"] pipes = [] process = ["libc"] proc-info = ["tty", "walkdir"] -quoting-style = ["i18n"] +quoting-style = ["i18n-common"] ranges = [] ringbuffer = [] selinux = ["dep:selinux"] diff --git a/src/uucore/src/lib/features.rs b/src/uucore/src/lib/features.rs index fcc97b0f0..3a622cd68 100644 --- a/src/uucore/src/lib/features.rs +++ b/src/uucore/src/lib/features.rs @@ -26,7 +26,7 @@ pub mod format; pub mod fs; #[cfg(feature = "fsext")] pub mod fsext; -#[cfg(feature = "i18n")] +#[cfg(feature = "i18n-common")] pub mod i18n; #[cfg(feature = "lines")] pub mod lines; diff --git a/src/uucore/src/lib/features/i18n/decimal.rs b/src/uucore/src/lib/features/i18n/decimal.rs new file mode 100644 index 000000000..9fa2d8d7b --- /dev/null +++ b/src/uucore/src/lib/features/i18n/decimal.rs @@ -0,0 +1,51 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::sync::OnceLock; + +use icu_decimal::provider::DecimalSymbolsV1; +use icu_locale::Locale; +use icu_provider::prelude::*; + +use crate::i18n::get_numeric_locale; + +/// Return the decimal separator for the given locale +fn get_decimal_separator(loc: Locale) -> String { + let data_locale = DataLocale::from(loc); + + let request = DataRequest { + id: DataIdentifierBorrowed::for_locale(&data_locale), + metadata: DataRequestMetadata::default(), + }; + + let response: DataResponse = + icu_decimal::provider::Baked.load(request).unwrap(); + + response.payload.get().decimal_separator().to_string() +} + +/// Return the decimal separator from the language we're working with. +/// Example: +/// Say we need to format 1000.5 +/// en_US: 1,000.5 -> decimal separator is '.' +/// fr_FR: 1 000,5 -> decimal separator is ',' +pub fn locale_decimal_separator() -> &'static str { + static DECIMAL_SEP: OnceLock = OnceLock::new(); + + DECIMAL_SEP.get_or_init(|| get_decimal_separator(get_numeric_locale().0.clone())) +} + +#[cfg(test)] +mod tests { + use icu_locale::locale; + + use super::get_decimal_separator; + + #[test] + fn test_simple_separator() { + assert_eq!(get_decimal_separator(locale!("en")), "."); + assert_eq!(get_decimal_separator(locale!("fr")), ","); + } +} diff --git a/src/uucore/src/lib/features/i18n/mod.rs b/src/uucore/src/lib/features/i18n/mod.rs index 5a7cf8ea3..dd4ac2d5e 100644 --- a/src/uucore/src/lib/features/i18n/mod.rs +++ b/src/uucore/src/lib/features/i18n/mod.rs @@ -1,7 +1,15 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + use std::sync::OnceLock; use icu_locale::{Locale, locale}; +#[cfg(feature = "i18n-decimal")] +pub mod decimal; + /// The encoding specified by the locale, if specified /// Currently only supports ASCII and UTF-8 for the sake of simplicity. #[derive(Debug, PartialEq, Eq, Clone, Copy)] @@ -12,48 +20,59 @@ pub enum UEncoding { const DEFAULT_LOCALE: Locale = locale!("en-US-posix"); -/// Deduce the locale from the current environment +/// Look at 3 environment variables in the following order +/// +/// 1. LC_ALL +/// 2. `locale_name` +/// 3. LANG +/// +/// Or fallback on Posix locale, with ASCII encoding. +fn get_locale_from_env(locale_name: &str) -> (Locale, UEncoding) { + let locale_var = ["LC_ALL", locale_name, "LANG"] + .iter() + .find_map(|&key| std::env::var(key).ok()); + + if let Some(locale_var_str) = locale_var { + let mut split = locale_var_str.split(&['.', '@']); + + if let Some(simple) = split.next() { + // Naively convert the locale name to BCP47 tag format. + // + // See https://en.wikipedia.org/wiki/IETF_language_tag + let bcp47 = simple.replace("_", "-"); + let locale = Locale::try_from_str(&bcp47).unwrap_or(DEFAULT_LOCALE); + + // If locale parsing failed, parse the encoding part of the + // locale. Treat the special case of the given locale being "C" + // which becomes the default locale. + let encoding = if (locale != DEFAULT_LOCALE || bcp47 == "C") + && split + .next() + .is_some_and(|enc| enc.to_lowercase() == "utf-8") + { + UEncoding::Utf8 + } else { + UEncoding::Ascii + }; + return (locale, encoding); + } + } + // Default POSIX locale representing LC_ALL=C + (DEFAULT_LOCALE, UEncoding::Ascii) +} + +/// Get the collating locale from the environment fn get_collating_locale() -> &'static (Locale, UEncoding) { static COLLATING_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new(); - COLLATING_LOCALE.get_or_init(|| { - // Look at 3 environment variables in the following order - // - // 1. LC_ALL - // 2. LC_COLLATE - // 3. LANG - // - // Or fallback on Posix locale, with ASCII encoding. + COLLATING_LOCALE.get_or_init(|| {get_locale_from_env("LC_COLLATE")}) +} - let locale_var = std::env::var("LC_ALL") - .or_else(|_| std::env::var("LC_COLLATE")) - .or_else(|_| std::env::var("LANG")); +/// Get the numeric locale from the environment +pub fn get_numeric_locale() -> &'static (Locale, UEncoding) { + static NUMERIC_LOCALE: OnceLock<(Locale, UEncoding)> = OnceLock::new(); - if let Ok(locale_var_str) = locale_var { - let mut split = locale_var_str.split(&['.', '@']); - - if let Some(simple) = split.next() { - let bcp47 = simple.replace("_", "-"); - let locale = Locale::try_from_str(&bcp47).unwrap_or(DEFAULT_LOCALE); - - // If locale parsing failed, parse the encoding part of the - // locale. Treat the special case of the given locale being "C" - // which becomes the default locale. - let encoding = if (locale != DEFAULT_LOCALE || bcp47 == "C") - && split.next() == Some("UTF-8") - { - UEncoding::Utf8 - } else { - UEncoding::Ascii - }; - return (locale, encoding); - } else { - return (DEFAULT_LOCALE, UEncoding::Ascii); - }; - } - // Default POSIX locale representing LC_ALL=C - (DEFAULT_LOCALE, UEncoding::Ascii) - }) + NUMERIC_LOCALE.get_or_init(|| {get_locale_from_env("LC_NUMERIC")}) } /// Return the encoding deduced from the locale environment variable. diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 6a137b787..dafcdfca4 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -51,7 +51,7 @@ pub use crate::features::fast_inc; pub use crate::features::format; #[cfg(feature = "fs")] pub use crate::features::fs; -#[cfg(feature = "i18n")] +#[cfg(feature = "i18n-common")] pub use crate::features::i18n; #[cfg(feature = "lines")] pub use crate::features::lines;