use isValidUnicode in fromUtf8 to make it faster. Also fix off by one bug.

This commit is contained in:
Brendan Hansknecht 2023-03-15 07:57:45 -07:00
parent f6677f3c1f
commit 7f3c67ade9
No known key found for this signature in database
GPG key ID: 0EA784685083E75B

View file

@ -1811,7 +1811,7 @@ pub fn fromUtf8RangeC(
pub fn fromUtf8Range(arg: RocList, start: usize, count: usize, update_mode: UpdateMode) FromUtf8Result {
const bytes = @ptrCast([*]const u8, arg.bytes)[start..count];
if (unicode.utf8ValidateSlice(bytes)) {
if (isValidUnicode(@ptrCast([*]const u8, bytes), bytes.len)) {
// the output will be correct. Now we need to clone the input
// TODO: rework this to properly take advantage fo seamless slices.
@ -1877,8 +1877,8 @@ fn errorToProblem(bytes: [*]u8, length: usize) struct { index: usize, problem: U
unreachable;
}
pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool {
const buf: []u8 = ptr[0..len];
pub fn isValidUnicode(ptr: [*]const u8, len: usize) callconv(.C) bool {
const buf: []const u8 = ptr[0..len];
const size = @sizeOf(u64);
// TODO: we should test changing the step on other platforms.
@ -1914,7 +1914,8 @@ pub fn isValidUnicode(ptr: [*]u8, len: usize) callconv(.C) bool {
}
}
while (buf[i] < 0b1000_0000) : (i += 1) {
while (buf[i] < 0b1000_0000) {
i += 1;
if (i == buf.len) return true;
}