expr: add locale-aware length

This commit is contained in:
Dorian Peron 2025-07-04 01:40:09 +02:00
parent 7bed9b5a3b
commit 8c565ece3e
2 changed files with 12 additions and 2 deletions

View file

@ -58,3 +58,13 @@ fn index_with_locale(
pub(crate) fn locale_aware_index(left: &MaybeNonUtf8Str, right: &MaybeNonUtf8Str) -> usize {
index_with_locale(left, right, get_locale_encoding())
}
/// Perform a string length calculation depending on the current locale. In
/// UTF-8 locale, it will count valid UTF-8 chars, and fallback to counting
/// bytes otherwise. In Non UTF-8 locale, directly return input byte length.
pub(crate) fn locale_aware_length(input: &MaybeNonUtf8Str) -> usize {
match get_locale_encoding() {
UEncoding::Utf8 => std::str::from_utf8(input).map_or(input.len(), |s| s.chars().count()),
UEncoding::Ascii => input.len(),
}
}

View file

@ -13,7 +13,7 @@ use onig::{Regex, RegexOptions, Syntax};
use crate::{
ExprError, ExprResult,
locale_aware::{locale_aware_index, locale_comparison},
locale_aware::{locale_aware_index, locale_aware_length, locale_comparison},
};
pub(crate) type MaybeNonUtf8String = Vec<u8>;
@ -583,7 +583,7 @@ impl AstNode {
continue;
};
let length = string?.eval_as_string().iter().count();
let length = locale_aware_length(&string?.eval_as_string());
result_stack.insert(node.id, Ok(length.into()));
}
}