mirror of
https://github.com/roc-lang/roc.git
synced 2025-09-26 13:29:12 +00:00
fix accessing bug
This commit is contained in:
parent
88a69a23a8
commit
0f708d7577
1 changed files with 7 additions and 8 deletions
|
@ -1811,7 +1811,7 @@ pub fn fromUtf8RangeC(
|
||||||
pub fn fromUtf8Range(arg: RocList, start: usize, count: usize, update_mode: UpdateMode) FromUtf8Result {
|
pub fn fromUtf8Range(arg: RocList, start: usize, count: usize, update_mode: UpdateMode) FromUtf8Result {
|
||||||
const bytes = @ptrCast([*]const u8, arg.bytes)[start..count];
|
const bytes = @ptrCast([*]const u8, arg.bytes)[start..count];
|
||||||
|
|
||||||
if (isValidUnicode(@ptrCast([*]const u8, bytes), bytes.len)) {
|
if (isValidUnicode(bytes)) {
|
||||||
// the output will be correct. Now we need to clone the input
|
// the output will be correct. Now we need to clone the input
|
||||||
|
|
||||||
// TODO: rework this to properly take advantage fo seamless slices.
|
// TODO: rework this to properly take advantage fo seamless slices.
|
||||||
|
@ -1877,9 +1877,7 @@ fn errorToProblem(bytes: [*]u8, length: usize) struct { index: usize, problem: U
|
||||||
unreachable;
|
unreachable;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn isValidUnicode(ptr: [*]const u8, len: usize) callconv(.C) bool {
|
pub fn isValidUnicode(buf: []const u8) bool {
|
||||||
const buf: []const u8 = ptr[0..len];
|
|
||||||
|
|
||||||
const size = @sizeOf(u64);
|
const size = @sizeOf(u64);
|
||||||
// TODO: we should test changing the step on other platforms.
|
// TODO: we should test changing the step on other platforms.
|
||||||
// The general tradeoff is making extremely large strings potentially much faster
|
// The general tradeoff is making extremely large strings potentially much faster
|
||||||
|
@ -1887,7 +1885,7 @@ pub fn isValidUnicode(ptr: [*]const u8, len: usize) callconv(.C) bool {
|
||||||
const step = size;
|
const step = size;
|
||||||
var i: usize = 0;
|
var i: usize = 0;
|
||||||
while (i + step < buf.len) {
|
while (i + step < buf.len) {
|
||||||
var bytes: u64 = 0;
|
var bytes: u64 = undefined;
|
||||||
@memcpy(@ptrCast([*]u8, &bytes), @ptrCast([*]const u8, buf) + i, size);
|
@memcpy(@ptrCast([*]u8, &bytes), @ptrCast([*]const u8, buf) + i, size);
|
||||||
const unicode_bytes = bytes & 0x8080_8080_8080_8080;
|
const unicode_bytes = bytes & 0x8080_8080_8080_8080;
|
||||||
if (unicode_bytes == 0) {
|
if (unicode_bytes == 0) {
|
||||||
|
@ -1899,21 +1897,22 @@ pub fn isValidUnicode(ptr: [*]const u8, len: usize) callconv(.C) bool {
|
||||||
|
|
||||||
while (buf[i] >= 0b1000_0000) {
|
while (buf[i] >= 0b1000_0000) {
|
||||||
// This forces prefetching, otherwise the loop can run at about half speed.
|
// This forces prefetching, otherwise the loop can run at about half speed.
|
||||||
var small_buf = [4]u8{ 0, 0, 0, 0 };
|
var small_buf: [4]u8 = undefined;
|
||||||
@memcpy(&small_buf, @ptrCast([*]const u8, buf) + i, size);
|
@memcpy(&small_buf, @ptrCast([*]const u8, buf) + i, 4);
|
||||||
// TODO: Should we always inline these function calls below?
|
// TODO: Should we always inline these function calls below?
|
||||||
if (std.unicode.utf8ByteSequenceLength(small_buf[0])) |cp_len| {
|
if (std.unicode.utf8ByteSequenceLength(small_buf[0])) |cp_len| {
|
||||||
if (std.meta.isError(std.unicode.utf8Decode(small_buf[0..cp_len]))) {
|
if (std.meta.isError(std.unicode.utf8Decode(small_buf[0..cp_len]))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
i += cp_len;
|
i += cp_len;
|
||||||
if (i == buf.len) return true;
|
if (i + 4 >= buf.len) break;
|
||||||
} else |_| {
|
} else |_| {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (i == buf.len) return true;
|
||||||
while (buf[i] < 0b1000_0000) {
|
while (buf[i] < 0b1000_0000) {
|
||||||
i += 1;
|
i += 1;
|
||||||
if (i == buf.len) return true;
|
if (i == buf.len) return true;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue