feat(ops): Fast zero copy string arguments (#16777)

Uses SeqOneByteString optimization to do zero-copy `&str` arguments in
fast calls.

- [x] Depends on https://github.com/denoland/rusty_v8/pull/1129
- [x] Depends on
4036884
- [x] Disable in async ops
- [x] Make it work with owned `String` with an extra alloc in fast path.
- [x] Support `Cow<'_, str>`. Owned for slow case, Borrowed for fast
case

```rust
#[op]
fn op_string_len(s: &str) -> u32 { 
  str.len() as u32 
}
```
This commit is contained in:
Divy Srivastava 2022-12-01 21:29:15 -08:00 committed by GitHub
parent 075854e516
commit 9b2b8df927
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
34 changed files with 804 additions and 97 deletions

View file

@ -270,7 +270,7 @@ fn op_encoding_decode_single(
#[op]
fn op_encoding_new_decoder(
state: &mut OpState,
label: String,
label: &str,
fatal: bool,
ignore_bom: bool,
) -> Result<ResourceId, AnyError> {
@ -352,25 +352,43 @@ impl Resource for TextDecoderResource {
}
}
#[op(v8)]
#[op]
fn op_encoding_encode_into(
scope: &mut v8::HandleScope,
input: serde_v8::Value,
input: Cow<'_, str>,
buffer: &mut [u8],
out_buf: &mut [u32],
) -> Result<(), AnyError> {
let s = v8::Local::<v8::String>::try_from(input.v8_value)?;
) {
// Since `input` is already UTF-8, we can simply find the last UTF-8 code
// point boundary from input that fits in `buffer`, and copy the bytes up to
// that point.
let boundary = if buffer.len() >= input.len() {
input.len()
} else {
let mut boundary = buffer.len();
let mut nchars = 0;
out_buf[1] = s.write_utf8(
scope,
buffer,
Some(&mut nchars),
v8::WriteOptions::NO_NULL_TERMINATION
| v8::WriteOptions::REPLACE_INVALID_UTF8,
) as u32;
out_buf[0] = nchars as u32;
Ok(())
// The maximum length of a UTF-8 code point is 4 bytes.
for _ in 0..4 {
if input.is_char_boundary(boundary) {
break;
}
debug_assert!(boundary > 0);
boundary -= 1;
}
debug_assert!(input.is_char_boundary(boundary));
boundary
};
buffer[..boundary].copy_from_slice(input[..boundary].as_bytes());
// The `read` output parameter is measured in UTF-16 code units.
out_buf[0] = match input {
// Borrowed Cow strings are zero-copy views into the V8 heap.
// Thus, they are guarantee to be SeqOneByteString.
Cow::Borrowed(v) => v[..boundary].len() as u32,
Cow::Owned(v) => v[..boundary].encode_utf16().count() as u32,
};
out_buf[1] = boundary as u32;
}
#[op(v8)]