mirror of
https://github.com/roc-lang/roc.git
synced 2025-08-04 12:18:19 +00:00
with_ascii_lowercased zig builtin
This commit is contained in:
parent
809fe23afd
commit
8403f1ea19
36 changed files with 303 additions and 163 deletions
|
@ -211,6 +211,7 @@ comptime {
|
|||
exportStrFn(str.withCapacityC, "with_capacity");
|
||||
exportStrFn(str.strAllocationPtr, "allocation_ptr");
|
||||
exportStrFn(str.strReleaseExcessCapacity, "release_excess_capacity");
|
||||
exportStrFn(str.strWithAsciiLowercased, "with_ascii_lowercased");
|
||||
|
||||
for (INTEGERS) |T| {
|
||||
str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int.");
|
||||
|
|
|
@ -2,6 +2,7 @@ const utils = @import("utils.zig");
|
|||
const RocList = @import("list.zig").RocList;
|
||||
const UpdateMode = utils.UpdateMode;
|
||||
const std = @import("std");
|
||||
const ascii = std.ascii;
|
||||
const mem = std.mem;
|
||||
const unicode = std.unicode;
|
||||
const testing = std.testing;
|
||||
|
@ -374,7 +375,12 @@ pub const RocStr = extern struct {
|
|||
return 1;
|
||||
}
|
||||
|
||||
const ptr: [*]usize = @as([*]usize, @ptrCast(@alignCast(self.bytes)));
|
||||
const data_ptr = if (self.isSeamlessSlice())
|
||||
self.getAllocationPtr()
|
||||
else
|
||||
self.bytes;
|
||||
|
||||
const ptr: [*]usize = @as([*]usize, @ptrCast(@alignCast(data_ptr)));
|
||||
return (ptr - 1)[0];
|
||||
}
|
||||
|
||||
|
@ -611,16 +617,6 @@ fn initFromSmallStr(slice_bytes: [*]u8, len: usize, _: usize) RocStr {
|
|||
return RocStr.init(slice_bytes, len);
|
||||
}
|
||||
|
||||
// The alloc_ptr must already be shifted to be ready for storing in a seamless slice.
|
||||
fn initFromBigStr(slice_bytes: [*]u8, len: usize, alloc_ptr: usize) RocStr {
|
||||
// Here we can make seamless slices instead of copying to a new small str.
|
||||
return RocStr{
|
||||
.bytes = slice_bytes,
|
||||
.length = len | SEAMLESS_SLICE_BIT,
|
||||
.capacity_or_alloc_ptr = alloc_ptr,
|
||||
};
|
||||
}
|
||||
|
||||
fn strSplitOnHelp(array: [*]RocStr, string: RocStr, delimiter: RocStr) void {
|
||||
if (delimiter.len() == 0) {
|
||||
string.incref(1);
|
||||
|
@ -1968,6 +1964,66 @@ fn countTrailingWhitespaceBytes(string: RocStr) usize {
|
|||
return byte_count;
|
||||
}
|
||||
|
||||
// Str.with_ascii_lowercased
|
||||
pub fn strWithAsciiLowercased(string: RocStr) callconv(.C) RocStr {
|
||||
var new_str = if (string.isUnique())
|
||||
string
|
||||
else blk: {
|
||||
string.decref();
|
||||
break :blk RocStr.fromSlice(string.asSlice());
|
||||
};
|
||||
|
||||
const new_str_bytes = new_str.asU8ptrMut()[0..string.len()];
|
||||
for (new_str_bytes) |*c| {
|
||||
c.* = ascii.toLower(c.*);
|
||||
}
|
||||
return new_str;
|
||||
}
|
||||
|
||||
test "withAsciiLowercased: small str" {
|
||||
const original = RocStr.fromSlice("cOFFÉ");
|
||||
try expect(original.isSmallStr());
|
||||
|
||||
const expected = RocStr.fromSlice("coffÉ");
|
||||
defer expected.decref();
|
||||
|
||||
const str_result = strWithAsciiLowercased(original);
|
||||
defer str_result.decref();
|
||||
|
||||
try expect(str_result.isSmallStr());
|
||||
try expect(str_result.eq(expected));
|
||||
}
|
||||
|
||||
test "withAsciiLowercased: non small str" {
|
||||
const original = RocStr.fromSlice("cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ");
|
||||
defer original.decref();
|
||||
try expect(!original.isSmallStr());
|
||||
|
||||
const expected = RocStr.fromSlice("coffÉ coffÉ coffÉ coffÉ coffÉ coffÉ");
|
||||
defer expected.decref();
|
||||
|
||||
const str_result = strWithAsciiLowercased(original);
|
||||
|
||||
try expect(!str_result.isSmallStr());
|
||||
try expect(str_result.eq(expected));
|
||||
}
|
||||
|
||||
test "withAsciiLowercased: seamless slice" {
|
||||
const l = RocStr.fromSlice("cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ cOFFÉ");
|
||||
const original = substringUnsafeC(l, 1, l.len() - 1);
|
||||
defer original.decref();
|
||||
|
||||
try expect(original.isSeamlessSlice());
|
||||
|
||||
const expected = RocStr.fromSlice("offÉ coffÉ coffÉ coffÉ coffÉ coffÉ");
|
||||
defer expected.decref();
|
||||
|
||||
const str_result = strWithAsciiLowercased(original);
|
||||
|
||||
try expect(!str_result.isSmallStr());
|
||||
try expect(str_result.eq(expected));
|
||||
}
|
||||
|
||||
fn rcNone(_: ?[*]u8) callconv(.C) void {}
|
||||
|
||||
fn decStr(ptr: ?[*]u8) callconv(.C) void {
|
||||
|
|
|
@ -369,6 +369,7 @@ module [
|
|||
contains,
|
||||
drop_prefix,
|
||||
drop_suffix,
|
||||
with_ascii_lowercased,
|
||||
]
|
||||
|
||||
import Bool exposing [Bool]
|
||||
|
@ -1092,3 +1093,18 @@ drop_suffix = |haystack, suffix|
|
|||
substring_unsafe(haystack, start, len)
|
||||
else
|
||||
haystack
|
||||
|
||||
## Returns a version of the string with all [ASCII characters](https://en.wikipedia.org/wiki/ASCII) lowercased. Non-ASCII characters are left unmodified. For example:
|
||||
##
|
||||
## ```roc
|
||||
## expect "CAFÉ".with_ascii_lowercased() == "cafÉ"
|
||||
## ```
|
||||
##
|
||||
## This function is useful for things like [command-line options](https://en.wikipedia.org/wiki/Command-line_interface#Command-line_option) and [environment variables](https://en.wikipedia.org/wiki/Environment_variablewhere you ## know in advance that you're dealing with a hardcoded string containing only ASCII characters. It has better performance than lowercasing operations which take Unicode into account.
|
||||
##
|
||||
## That said, strings received from user input can always contain non-ASCII Unicode characters, and lowercasing [Unicode](https://unicode.org) works differently in different languages. For example, the string `"I"lowercases to `"i"## ` in English and to `"ı"` (a [dotless i](https://en.wikipedia.org/wiki/Dotless_I)) in Turkish. These rules can also change in each [Unicode release](https://www.unicode.org/releases/), so we have separate [`unicode` package]## (https://github.com/roc-lang/unicode) for Unicode capitalization that can be upgraded independently from the language's builtins.
|
||||
##
|
||||
## To do a case-insensitive comparison of the ASCII characters in a string, use [`caseless_ascii_equals`](#caseless_ascii_equals).
|
||||
with_ascii_lowercased: Str -> Str
|
||||
|
||||
expect Str.with_ascii_lowercased "cOFFÉ" == "XYZFÉ"
|
||||
|
|
|
@ -358,6 +358,7 @@ pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to";
|
|||
pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity";
|
||||
pub const STR_ALLOCATION_PTR: &str = "roc_builtins.str.allocation_ptr";
|
||||
pub const STR_RELEASE_EXCESS_CAPACITY: &str = "roc_builtins.str.release_excess_capacity";
|
||||
pub const STR_WITH_ASCII_LOWERCASED: &str = "roc_builtins.str.with_ascii_lowercased";
|
||||
|
||||
pub const LIST_MAP: &str = "roc_builtins.list.map";
|
||||
pub const LIST_MAP2: &str = "roc_builtins.list.map2";
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue