perf(ext/web): add op_encode_binary_string (#16352)

Add a new op to use in `reader.readAsBinaryString(blob)`.

```
File API binary string: 400b    35.12 µs/iter    (21.93 µs … 3.27 ms)  31.87 µs 131.95 µs 217.63 µs
File API binary string: 4kb     46.49 µs/iter    (29.36 µs … 4.42 ms)   42.5 µs 122.48 µs  155.1 µs
File API binary string: 2.2mb    4.17 ms/iter     (1.75 ms … 8.54 ms)   5.48 ms   7.39 ms   8.54 ms
```

**main**

```
benchmark                          time (avg)             (min … max)       p75       p99      p995
--------------------------------------------------------------------- -----------------------------
File API binary string: 400b    56.17 µs/iter  (43.09 µs … 784.52 µs)   49.6 µs 177.18 µs 241.23 µs
File API binary string: 4kb     277.2 µs/iter   (240.29 µs … 1.84 ms) 269.87 µs 649.79 µs 774.46 µs
File API binary string: 2.2mb  180.03 ms/iter (173.32 ms … 194.35 ms) 182.54 ms 194.35 ms 194.35 ms
```

It can also handle bigger files, when encoding a 200mb file, main
crashes with OOM

```
<--- Last few GCs --->

[132677:0x560504676550]     5012 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms  (average mu = 0.824, current mu = 0.825) allocation failure; 
[132677:0x560504676550]     5038 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms  (average mu = 0.824, current mu = 0.825) allocation failure; 
[132677:0x560504676550]     5064 ms: Scavenge 417.3 (434.6) -> 401.8 (434.6) MB, 0.1 / 0.0 ms  (average mu = 0.824, current mu = 0.825) allocation failure;
```
This commit is contained in:
Marcos Casagrande 2022-10-24 20:27:22 +02:00 committed by GitHub
parent 7a65b8e8da
commit ac5fcf626a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 10 deletions

View file

@ -247,6 +247,7 @@ Deno.test(function toStringShouldBeWebCompatibility() {
const decoder = new TextDecoder();
assertEquals(decoder.toString(), "[object TextDecoder]");
});
Deno.test(function textEncoderShouldCoerceToString() {
const encoder = new TextEncoder();
const fixutreText = "text";
@ -261,3 +262,60 @@ Deno.test(function textEncoderShouldCoerceToString() {
const decoded = decoder.decode(bytes);
assertEquals(decoded, fixutreText);
});
Deno.test(function binaryEncode() {
// @ts-ignore: Deno.core allowed
const ops = Deno.core.ops;
function asBinaryString(bytes: Uint8Array): string {
return Array.from(bytes).map(
(v: number) => String.fromCodePoint(v),
).join("");
}
function decodeBinary(binaryString: string) {
const chars: string[] = Array.from(binaryString);
return chars.map((v: string): number | undefined => v.codePointAt(0));
}
// invalid utf-8 code points
const invalid = new Uint8Array([0xC0]);
assertEquals(
ops.op_encode_binary_string(invalid),
asBinaryString(invalid),
);
const invalid2 = new Uint8Array([0xC1]);
assertEquals(
ops.op_encode_binary_string(invalid2),
asBinaryString(invalid2),
);
for (let i = 0, j = 255; i <= 255; i++, j--) {
const bytes = new Uint8Array([i, j]);
const binaryString = ops.op_encode_binary_string(bytes);
assertEquals(
binaryString,
asBinaryString(bytes),
);
assertEquals(Array.from(bytes), decodeBinary(binaryString));
}
const inputs = [
"σ😀",
"Кириллица is Cyrillic",
"𝓽𝓮𝔁𝓽",
"lone𝄞\ud888surrogate",
"\udc00\ud800",
"\ud800",
];
for (const input of inputs) {
const bytes = new TextEncoder().encode(input);
const binaryString = ops.op_encode_binary_string(bytes);
assertEquals(
binaryString,
asBinaryString(bytes),
);
assertEquals(Array.from(bytes), decodeBinary(binaryString));
}
});