From 8c565ece3eb0f3bb4fde4fa2d2dfc77899d6f23d Mon Sep 17 00:00:00 2001 From: Dorian Peron Date: Fri, 4 Jul 2025 01:40:09 +0200 Subject: [PATCH] expr: add locale-aware length --- src/uu/expr/src/locale_aware.rs | 10 ++++++++++ src/uu/expr/src/syntax_tree.rs | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/uu/expr/src/locale_aware.rs b/src/uu/expr/src/locale_aware.rs index ebdc78e0c..cc07f9cc6 100644 --- a/src/uu/expr/src/locale_aware.rs +++ b/src/uu/expr/src/locale_aware.rs @@ -58,3 +58,13 @@ fn index_with_locale( pub(crate) fn locale_aware_index(left: &MaybeNonUtf8Str, right: &MaybeNonUtf8Str) -> usize { index_with_locale(left, right, get_locale_encoding()) } + +/// Perform a string length calculation depending on the current locale. In +/// UTF-8 locale, it will count valid UTF-8 chars, and fallback to counting +/// bytes otherwise. In Non UTF-8 locale, directly return input byte length. +pub(crate) fn locale_aware_length(input: &MaybeNonUtf8Str) -> usize { + match get_locale_encoding() { + UEncoding::Utf8 => std::str::from_utf8(input).map_or(input.len(), |s| s.chars().count()), + UEncoding::Ascii => input.len(), + } +} diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs index 66e75044d..8a45c2362 100644 --- a/src/uu/expr/src/syntax_tree.rs +++ b/src/uu/expr/src/syntax_tree.rs @@ -13,7 +13,7 @@ use onig::{Regex, RegexOptions, Syntax}; use crate::{ ExprError, ExprResult, - locale_aware::{locale_aware_index, locale_comparison}, + locale_aware::{locale_aware_index, locale_aware_length, locale_comparison}, }; pub(crate) type MaybeNonUtf8String = Vec; @@ -583,7 +583,7 @@ impl AstNode { continue; }; - let length = string?.eval_as_string().iter().count(); + let length = locale_aware_length(&string?.eval_as_string()); result_stack.insert(node.id, Ok(length.into())); } }