mirror of
https://github.com/ruuda/rcl.git
synced 2025-12-23 04:47:19 +00:00
This ensures that
foreach s: String: s.len() == s.chars().len()
which seems like a desirable property to me. In a low-level language
like Rust, len() counting the UTF-8 byte length makes sense, but in a
high-level configuration language, it makes more sense to me to count
code points. One could argue that if we go down this slippery slope,
it should count grapheme clusters instead, but the definition of those
is not stable over time and depends on geopolitical considerations of
$CURRENT_YEAR, I don't want to go there.
17 lines
312 B
Text
17 lines
312 B
Text
let strings = [
|
|
"example",
|
|
// The string "Z\u{00fc}rich"
|
|
"Zürich",
|
|
// The string "Zu\u{0308}rich"
|
|
"Zürich",
|
|
// The string "\u{1f574}\u{fe0e}"
|
|
"🕴︎",
|
|
];
|
|
[
|
|
for s in strings:
|
|
assert s.len() == s.chars().len(), ["String.len counts chars.", s, s.chars()];
|
|
s.len()
|
|
]
|
|
|
|
# output:
|
|
[7, 6, 7, 2]
|