fix(ext/node): fix buffer.includes edge cases (#29591)

This commit is contained in:
Yoshiya Hinosawa 2025-06-04 14:39:27 +09:00 committed by GitHub
parent ff8160b594
commit 0f6d515c91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 28 additions and 48 deletions

View file

@ -36,18 +36,6 @@ export function indexOfNeedle(
return -1; return -1;
} }
export function numberToBytes(n: number): Uint8Array {
if (n === 0) return new Uint8Array([0]);
const bytes = [];
bytes.unshift(n & 255);
while (n >= 256) {
n = n >>> 8;
bytes.unshift(n & 255);
}
return new Uint8Array(bytes);
}
// TODO(Soremwar) // TODO(Soremwar)
// Check if offset or buffer can be transform in order to just use std's lastIndexOf directly // Check if offset or buffer can be transform in order to just use std's lastIndexOf directly
// This implementation differs from std's lastIndexOf in the fact that // This implementation differs from std's lastIndexOf in the fact that
@ -101,8 +89,6 @@ function findLastIndex(
return searchableBufferLastIndex - index; return searchableBufferLastIndex - index;
} }
// TODO(@bartlomieju):
// Take encoding into account when evaluating index
function indexOfBuffer( function indexOfBuffer(
targetBuffer: Uint8Array, targetBuffer: Uint8Array,
buffer: Uint8Array, buffer: Uint8Array,
@ -114,6 +100,14 @@ function indexOfBuffer(
throw new Error(`Unknown encoding code ${encoding}`); throw new Error(`Unknown encoding code ${encoding}`);
} }
// If the encoding is UCS2 and haystack or needle has a length less than 2, the search will always fail
// https://github.com/nodejs/node/blob/fbdfe9399cf6c660e67fd7d6ceabfb106e32d787/src/node_buffer.cc#L1067-L1069
if (encoding === Encodings.UCS2) {
if (buffer.length < 2 || targetBuffer.length < 2) {
return -1;
}
}
if (!forwardDirection) { if (!forwardDirection) {
// If negative the offset is calculated from the end of the buffer // If negative the offset is calculated from the end of the buffer
@ -137,23 +131,17 @@ function indexOfBuffer(
return indexOfNeedle(targetBuffer, buffer, byteOffset); return indexOfNeedle(targetBuffer, buffer, byteOffset);
} }
// TODO(Soremwar)
// Node's implementation is a very obscure algorithm that I haven't been able to crack just yet
function indexOfNumber( function indexOfNumber(
targetBuffer: Uint8Array, targetBuffer: Uint8Array,
number: number, number: number,
byteOffset: number, byteOffset: number,
forwardDirection: boolean, forwardDirection: boolean,
) { ) {
const bytes = numberToBytes(number);
if (bytes.length > 1) {
throw new Error("Multi byte number search is not supported");
}
return indexOfBuffer( return indexOfBuffer(
targetBuffer, targetBuffer,
numberToBytes(number), // Uses only the last 2 hex digits of the number
// https://github.com/nodejs/node/issues/7591#issuecomment-231178104
Uint8Array.from([number & 255]),
byteOffset, byteOffset,
Encodings.UTF8, Encodings.UTF8,
forwardDirection, forwardDirection,

View file

@ -24,7 +24,6 @@
"test-blocklist.js", "test-blocklist.js",
"test-buffer-arraybuffer.js", "test-buffer-arraybuffer.js",
"test-buffer-backing-arraybuffer.js", "test-buffer-backing-arraybuffer.js",
"test-buffer-includes.js",
"test-buffer-indexof.js", "test-buffer-indexof.js",
"test-buffer-tostring-range.js", "test-buffer-tostring-range.js",
"test-child-process-exec-abortcontroller-promisified.js", "test-child-process-exec-abortcontroller-promisified.js",

View file

@ -2,7 +2,7 @@
// deno-lint-ignore-file // deno-lint-ignore-file
// Copyright Joyent and Node contributors. All rights reserved. MIT license. // Copyright Joyent and Node contributors. All rights reserved. MIT license.
// Taken from Node 18.12.1 // Taken from Node 23.9.0
// This file is automatically generated by `tests/node_compat/runner/setup.ts`. Do not modify this file manually. // This file is automatically generated by `tests/node_compat/runner/setup.ts`. Do not modify this file manually.
'use strict'; 'use strict';
@ -81,9 +81,7 @@ assert(b.includes(Buffer.from('f'), 5));
assert(b.includes(Buffer.from('f'), -1)); assert(b.includes(Buffer.from('f'), -1));
assert(!b.includes(Buffer.from('f'), 6)); assert(!b.includes(Buffer.from('f'), 6));
// TODO(Soremwar) assert(!Buffer.from('ff').includes(Buffer.from('f'), 1, 'ucs2'));
// Enable again once encoding is taking into account when evaluating indexOf
// assert(!Buffer.from('ff').includes(Buffer.from('f'), 1, 'ucs2'));
// test hex encoding // test hex encoding
assert.strictEqual( assert.strictEqual(
@ -222,10 +220,7 @@ assert(!asciiString.includes('\x2061'));
assert(asciiString.includes('leb', 0)); assert(asciiString.includes('leb', 0));
// Search in string containing many non-ASCII chars. // Search in string containing many non-ASCII chars.
const allCodePoints = []; const allCharsString = Array.from({ length: 65536 }, (_, i) => String.fromCharCode(i)).join('');
for (let i = 0; i < 65534; i++) allCodePoints[i] = i;
const allCharsString = String.fromCharCode.apply(String, allCodePoints) +
String.fromCharCode(65534, 65535);
const allCharsBufferUtf8 = Buffer.from(allCharsString); const allCharsBufferUtf8 = Buffer.from(allCharsString);
const allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2'); const allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2');
@ -299,19 +294,17 @@ for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) {
}); });
// Test truncation of Number arguments to uint8 // Test truncation of Number arguments to uint8
// TODO(Soremwar) {
// Enable once multi byte number search is available const buf = Buffer.from('this is a test');
// { assert.ok(buf.includes(0x6973));
// const buf = Buffer.from('this is a test'); assert.ok(buf.includes(0x697320));
// assert.ok(buf.includes(0x6973)); assert.ok(buf.includes(0x69732069));
// assert.ok(buf.includes(0x697320)); assert.ok(buf.includes(0x697374657374));
// assert.ok(buf.includes(0x69732069)); assert.ok(buf.includes(0x69737374));
// assert.ok(buf.includes(0x697374657374)); assert.ok(buf.includes(0x69737465));
// assert.ok(buf.includes(0x69737374)); assert.ok(buf.includes(0x69737465));
// assert.ok(buf.includes(0x69737465)); assert.ok(buf.includes(-140));
// assert.ok(buf.includes(0x69737465)); assert.ok(buf.includes(-152));
// assert.ok(buf.includes(-140)); assert.ok(!buf.includes(0xff));
// assert.ok(buf.includes(-152)); assert.ok(!buf.includes(0xffff));
// assert.ok(!buf.includes(0xff)); }
// assert.ok(!buf.includes(0xffff));
// }