mirror of
https://github.com/denoland/deno.git
synced 2025-09-22 02:12:33 +00:00

Previously, Deno throws error when creating `Buffer.allocUnsafe` or `Buffer.allocUnsafeSlow` with size of `2**31` where that's not the case with Node.js. The changes allow these tests to pass: - [test-buffer-large-size-buffer-alloc-unsafe-slow.js](https://github.com/nodejs/node/blob/v24.2.0/test/pummel/test-buffer-large-size-buffer-alloc-unsafe-slow.js) - [test-buffer-large-size-buffer-alloc-unsafe.js](https://github.com/nodejs/node/blob/v24.2.0/test/pummel/test-buffer-large-size-buffer-alloc-unsafe.js) - [test-string-decoder-large-buffer.js](https://github.com/nodejs/node/blob/v24.2.0/test/pummel/test-string-decoder-large-buffer.js) <!-- Before submitting a PR, please read https://docs.deno.com/runtime/manual/references/contributing 1. Give the PR a descriptive title. Examples of good title: - fix(std/http): Fix race condition in server - docs(console): Update docstrings - feat(doc): Handle nested reexports Examples of bad title: - fix #7123 - update docs - fix bugs 2. Ensure there is a related issue and it is referenced in the PR text. 3. Ensure there are tests that cover the changes. 4. Ensure `cargo test` passes. 5. Ensure `./tools/format.js` passes without changing files. 6. Ensure `./tools/lint.js` passes. 7. Open as a draft PR if your work is still in progress. The CI won't run all steps, but you can add '[ci]' to a commit message to force it to. 8. If you would like to run the benchmarks on the CI, add the 'ci-bench' label. -->
195 lines
5.1 KiB
Rust
195 lines
5.1 KiB
Rust
// Copyright 2018-2025 the Deno authors. MIT license.
|
|
|
|
use deno_core::op2;
|
|
use deno_core::v8;
|
|
use deno_error::JsErrorBox;
|
|
|
|
#[op2(fast)]
|
|
pub fn op_is_ascii(#[buffer] buf: &[u8]) -> bool {
|
|
buf.is_ascii()
|
|
}
|
|
|
|
#[op2(fast)]
|
|
pub fn op_is_utf8(#[buffer] buf: &[u8]) -> bool {
|
|
std::str::from_utf8(buf).is_ok()
|
|
}
|
|
|
|
#[op2]
|
|
#[buffer]
|
|
pub fn op_transcode(
|
|
#[buffer] source: &[u8],
|
|
#[string] from_encoding: &str,
|
|
#[string] to_encoding: &str,
|
|
) -> Result<Vec<u8>, JsErrorBox> {
|
|
match (from_encoding, to_encoding) {
|
|
("utf8", "ascii") => Ok(utf8_to_ascii(source)),
|
|
("utf8", "latin1") => Ok(utf8_to_latin1(source)),
|
|
("utf8", "utf16le") => utf8_to_utf16le(source),
|
|
("utf16le", "utf8") => utf16le_to_utf8(source),
|
|
("latin1", "utf16le") | ("ascii", "utf16le") => {
|
|
Ok(latin1_ascii_to_utf16le(source))
|
|
}
|
|
(from, to) => Err(JsErrorBox::generic(format!(
|
|
"Unable to transcode Buffer {from}->{to}"
|
|
))),
|
|
}
|
|
}
|
|
|
|
fn latin1_ascii_to_utf16le(source: &[u8]) -> Vec<u8> {
|
|
let mut result = Vec::with_capacity(source.len() * 2);
|
|
for &byte in source {
|
|
result.push(byte);
|
|
result.push(0);
|
|
}
|
|
result
|
|
}
|
|
|
|
fn utf16le_to_utf8(source: &[u8]) -> Result<Vec<u8>, JsErrorBox> {
|
|
let ucs2_vec: Vec<u16> = source
|
|
.chunks(2)
|
|
.map(|chunk| u16::from_le_bytes([chunk[0], chunk[1]]))
|
|
.collect();
|
|
String::from_utf16(&ucs2_vec)
|
|
.map(|utf8_string| utf8_string.into_bytes())
|
|
.map_err(|e| JsErrorBox::generic(format!("Invalid UTF-16 sequence: {}", e)))
|
|
}
|
|
|
|
fn utf8_to_utf16le(source: &[u8]) -> Result<Vec<u8>, JsErrorBox> {
|
|
let utf8_string =
|
|
std::str::from_utf8(source).map_err(JsErrorBox::from_err)?;
|
|
let ucs2_vec: Vec<u16> = utf8_string.encode_utf16().collect();
|
|
let bytes: Vec<u8> = ucs2_vec.iter().flat_map(|&x| x.to_le_bytes()).collect();
|
|
Ok(bytes)
|
|
}
|
|
|
|
fn utf8_to_latin1(source: &[u8]) -> Vec<u8> {
|
|
let mut latin1_bytes = Vec::with_capacity(source.len());
|
|
let mut i = 0;
|
|
while i < source.len() {
|
|
match source[i] {
|
|
byte if byte <= 0x7F => {
|
|
// ASCII character
|
|
latin1_bytes.push(byte);
|
|
i += 1;
|
|
}
|
|
byte if (0xC2..=0xDF).contains(&byte) && i + 1 < source.len() => {
|
|
// 2-byte UTF-8 sequence
|
|
let codepoint =
|
|
((byte as u16 & 0x1F) << 6) | (source[i + 1] as u16 & 0x3F);
|
|
latin1_bytes.push(if codepoint <= 0xFF {
|
|
codepoint as u8
|
|
} else {
|
|
b'?'
|
|
});
|
|
i += 2;
|
|
}
|
|
_ => {
|
|
// 3-byte or 4-byte UTF-8 sequence, or invalid UTF-8
|
|
latin1_bytes.push(b'?');
|
|
// Skip to the next valid UTF-8 start byte
|
|
i += 1;
|
|
while i < source.len() && (source[i] & 0xC0) == 0x80 {
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
latin1_bytes
|
|
}
|
|
|
|
fn utf8_to_ascii(source: &[u8]) -> Vec<u8> {
|
|
let mut ascii_bytes = Vec::with_capacity(source.len());
|
|
let mut i = 0;
|
|
while i < source.len() {
|
|
match source[i] {
|
|
byte if byte <= 0x7F => {
|
|
// ASCII character
|
|
ascii_bytes.push(byte);
|
|
i += 1;
|
|
}
|
|
_ => {
|
|
// Non-ASCII character
|
|
ascii_bytes.push(b'?');
|
|
// Skip to the next valid UTF-8 start byte
|
|
i += 1;
|
|
while i < source.len() && (source[i] & 0xC0) == 0x80 {
|
|
i += 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
ascii_bytes
|
|
}
|
|
|
|
#[op2]
|
|
pub fn op_node_decode_utf8<'a>(
|
|
scope: &mut v8::HandleScope<'a>,
|
|
buf: v8::Local<v8::ArrayBufferView>,
|
|
start: v8::Local<v8::Value>,
|
|
end: v8::Local<v8::Value>,
|
|
) -> Result<v8::Local<'a, v8::String>, JsErrorBox> {
|
|
let buf = buf.get_contents(&mut [0; v8::TYPED_ARRAY_MAX_SIZE_IN_HEAP]);
|
|
|
|
let start =
|
|
parse_array_index(scope, start, 0).map_err(JsErrorBox::from_err)?;
|
|
let mut end =
|
|
parse_array_index(scope, end, buf.len()).map_err(JsErrorBox::from_err)?;
|
|
|
|
if end < start {
|
|
end = start;
|
|
}
|
|
|
|
if end > buf.len() {
|
|
return Err(JsErrorBox::from_err(BufferError::OutOfRange));
|
|
}
|
|
|
|
let buffer = &buf[start..end];
|
|
|
|
if buffer.len() <= 256 && buffer.is_ascii() {
|
|
v8::String::new_from_one_byte(scope, buffer, v8::NewStringType::Normal)
|
|
.ok_or_else(|| JsErrorBox::from_err(BufferError::StringTooLong))
|
|
} else {
|
|
v8::String::new_from_utf8(scope, buffer, v8::NewStringType::Normal)
|
|
.ok_or_else(|| JsErrorBox::from_err(BufferError::StringTooLong))
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, thiserror::Error, deno_error::JsError)]
|
|
enum BufferError {
|
|
#[error(
|
|
"Cannot create a string longer than 0x{:x} characters",
|
|
v8::String::MAX_LENGTH
|
|
)]
|
|
#[class(generic)]
|
|
#[property("code" = "ERR_STRING_TOO_LONG")]
|
|
StringTooLong,
|
|
#[error("Invalid type")]
|
|
#[class(generic)]
|
|
InvalidType,
|
|
#[error("Index out of range")]
|
|
#[class(range)]
|
|
#[property("code" = "ERR_OUT_OF_RANGE")]
|
|
OutOfRange,
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn parse_array_index(
|
|
scope: &mut v8::HandleScope,
|
|
arg: v8::Local<v8::Value>,
|
|
default: usize,
|
|
) -> Result<usize, BufferError> {
|
|
if arg.is_undefined() {
|
|
return Ok(default);
|
|
}
|
|
|
|
let Some(arg) = arg.integer_value(scope) else {
|
|
return Err(BufferError::InvalidType);
|
|
};
|
|
if arg < 0 {
|
|
return Err(BufferError::OutOfRange);
|
|
}
|
|
if arg > isize::MAX as i64 {
|
|
return Err(BufferError::OutOfRange);
|
|
}
|
|
Ok(arg as usize)
|
|
}
|