mirror of
https://github.com/python/cpython.git
synced 2025-12-10 19:10:59 +00:00
gh-124621: Emscripten: Add support for async input devices (GH-136822)
This is useful for implementing proper `input()`. It requires the JavaScript engine to support the wasm JSPI spec which is now stage 4. It is supported on Chrome since version 137 and on Firefox and node behind a flag. We override the `__wasi_fd_read()` syscall with our own variant that checks for a readAsync operation. If it has it, we use our own async variant of `fd_read()`, otherwise we use the original `fd_read()`. We also add a variant of `FS.createDevice()` called `FS.createAsyncInputDevice()`. Finally, if JSPI is available, we wrap the `main()` symbol with `WebAssembly.promising()` so that we can stack switch from `fd_read()`. If JSPI is not available, attempting to read from an AsyncInputDevice will raise an `OSError`.
This commit is contained in:
parent
1ba23244f3
commit
7ae4749d06
4 changed files with 245 additions and 1 deletions
25
Lib/test/test_capi/test_emscripten.py
Normal file
25
Lib/test/test_capi/test_emscripten.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
import unittest
|
||||
from test.support import is_emscripten
|
||||
|
||||
if not is_emscripten:
|
||||
raise unittest.SkipTest("Emscripten-only test")
|
||||
|
||||
from _testinternalcapi import emscripten_set_up_async_input_device
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class EmscriptenAsyncInputDeviceTest(unittest.TestCase):
|
||||
def test_emscripten_async_input_device(self):
|
||||
jspi_supported = emscripten_set_up_async_input_device()
|
||||
p = Path("/dev/blah")
|
||||
self.addCleanup(p.unlink)
|
||||
if not jspi_supported:
|
||||
with open(p, "r") as f:
|
||||
self.assertRaises(OSError, f.readline)
|
||||
return
|
||||
|
||||
with open(p, "r") as f:
|
||||
for _ in range(10):
|
||||
self.assertEqual(f.readline().strip(), "ab")
|
||||
self.assertEqual(f.readline().strip(), "fi")
|
||||
self.assertEqual(f.readline().strip(), "xy")
|
||||
|
|
@ -2345,6 +2345,37 @@ incref_decref_delayed(PyObject *self, PyObject *op)
|
|||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
#ifdef __EMSCRIPTEN__
|
||||
#include "emscripten.h"
|
||||
|
||||
EM_JS(int, emscripten_set_up_async_input_device_js, (void), {
|
||||
let idx = 0;
|
||||
const encoder = new TextEncoder();
|
||||
const bufs = [
|
||||
encoder.encode("ab\n"),
|
||||
encoder.encode("fi\n"),
|
||||
encoder.encode("xy\n"),
|
||||
];
|
||||
function sleep(t) {
|
||||
return new Promise(res => setTimeout(res, t));
|
||||
}
|
||||
FS.createAsyncInputDevice("/dev", "blah", async () => {
|
||||
await sleep(5);
|
||||
return bufs[(idx ++) % 3];
|
||||
});
|
||||
return !!WebAssembly.promising;
|
||||
});
|
||||
|
||||
static PyObject *
|
||||
emscripten_set_up_async_input_device(PyObject *self, PyObject *Py_UNUSED(ignored)) {
|
||||
if (emscripten_set_up_async_input_device_js()) {
|
||||
Py_RETURN_TRUE;
|
||||
} else {
|
||||
Py_RETURN_FALSE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static PyMethodDef module_functions[] = {
|
||||
{"get_configs", get_configs, METH_NOARGS},
|
||||
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
|
||||
|
|
@ -2447,6 +2478,9 @@ static PyMethodDef module_functions[] = {
|
|||
{"is_static_immortal", is_static_immortal, METH_O},
|
||||
{"incref_decref_delayed", incref_decref_delayed, METH_O},
|
||||
GET_NEXT_DICT_KEYS_VERSION_METHODDEF
|
||||
#ifdef __EMSCRIPTEN__
|
||||
{"emscripten_set_up_async_input_device", emscripten_set_up_async_input_device, METH_NOARGS},
|
||||
#endif
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -37,3 +37,178 @@ EM_JS(int, __syscall_umask_js, (int mask), {
|
|||
int __syscall_umask(int mask) {
|
||||
return __syscall_umask_js(mask);
|
||||
}
|
||||
|
||||
#include <wasi/api.h>
|
||||
#include <errno.h>
|
||||
#undef errno
|
||||
|
||||
// Variant of EM_JS that does C preprocessor substitution on the body
|
||||
#define EM_JS_MACROS(ret, func_name, args, body...) \
|
||||
EM_JS(ret, func_name, args, body)
|
||||
|
||||
EM_JS_MACROS(void, _emscripten_promising_main_js, (void), {
|
||||
// Define FS.createAsyncInputDevice(), This is quite similar to
|
||||
// FS.createDevice() defined here:
|
||||
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libfs.js?plain=1#L1642
|
||||
// but instead of returning one byte at a time, the input() function should
|
||||
// return a Uint8Array. This makes the handler code simpler, the
|
||||
// `createAsyncInputDevice` simpler, and everything faster.
|
||||
FS.createAsyncInputDevice = function(parent, name, input) {
|
||||
parent = typeof parent == 'string' ? parent : FS.getPath(parent);
|
||||
var path = PATH.join2(parent, name);
|
||||
var mode = FS_getMode(true, false);
|
||||
FS.createDevice.major ||= 64;
|
||||
var dev = FS.makedev(FS.createDevice.major++, 0);
|
||||
async function getDataBuf() {
|
||||
var buf;
|
||||
try {
|
||||
buf = await input();
|
||||
} catch (e) {
|
||||
throw new FS.ErrnoError(EIO);
|
||||
}
|
||||
if (!buf?.byteLength) {
|
||||
throw new FS.ErrnoError(EAGAIN);
|
||||
}
|
||||
ops._dataBuf = buf;
|
||||
}
|
||||
|
||||
var ops = {
|
||||
_dataBuf: new Uint8Array(0),
|
||||
open(stream) {
|
||||
stream.seekable = false;
|
||||
},
|
||||
async readAsync(stream, buffer, offset, length, pos /* ignored */) {
|
||||
buffer = buffer.subarray(offset, offset + length);
|
||||
if (!ops._dataBuf.byteLength) {
|
||||
await getDataBuf();
|
||||
}
|
||||
var toRead = Math.min(ops._dataBuf.byteLength, buffer.byteLength);
|
||||
buffer.subarray(0, toRead).set(ops._dataBuf);
|
||||
buffer = buffer.subarray(toRead);
|
||||
ops._dataBuf = ops._dataBuf.subarray(toRead);
|
||||
if (toRead) {
|
||||
stream.node.atime = Date.now();
|
||||
}
|
||||
return toRead;
|
||||
},
|
||||
};
|
||||
FS.registerDevice(dev, ops);
|
||||
return FS.mkdev(path, mode, dev);
|
||||
};
|
||||
if (!WebAssembly.promising) {
|
||||
// No stack switching support =(
|
||||
return;
|
||||
}
|
||||
const origResolveGlobalSymbol = resolveGlobalSymbol;
|
||||
if (!Module.onExit && process?.exit) {
|
||||
Module.onExit = (code) => process.exit(code);
|
||||
}
|
||||
// * wrap the main symbol with WebAssembly.promising,
|
||||
// * call exit_with_live_runtime() to prevent emscripten from shutting down
|
||||
// the runtime before the promise resolves,
|
||||
// * call onExit / process.exit ourselves, since exit_with_live_runtime()
|
||||
// prevented Emscripten from calling it normally.
|
||||
resolveGlobalSymbol = function (name, direct = false) {
|
||||
const orig = origResolveGlobalSymbol(name, direct);
|
||||
if (name === "main") {
|
||||
const main = WebAssembly.promising(orig.sym);
|
||||
orig.sym = (...args) => {
|
||||
(async () => {
|
||||
const ret = await main(...args);
|
||||
process?.exit?.(ret);
|
||||
})();
|
||||
_emscripten_exit_with_live_runtime();
|
||||
};
|
||||
}
|
||||
return orig;
|
||||
};
|
||||
})
|
||||
|
||||
__attribute__((constructor)) void _emscripten_promising_main(void) {
|
||||
_emscripten_promising_main_js();
|
||||
}
|
||||
|
||||
|
||||
#define IOVEC_T_BUF_OFFSET 0
|
||||
#define IOVEC_T_BUF_LEN_OFFSET 4
|
||||
#define IOVEC_T_SIZE 8
|
||||
_Static_assert(offsetof(__wasi_iovec_t, buf) == IOVEC_T_BUF_OFFSET,
|
||||
"Unexpected __wasi_iovec_t layout");
|
||||
_Static_assert(offsetof(__wasi_iovec_t, buf_len) == IOVEC_T_BUF_LEN_OFFSET,
|
||||
"Unexpected __wasi_iovec_t layout");
|
||||
_Static_assert(sizeof(__wasi_iovec_t) == IOVEC_T_SIZE,
|
||||
"Unexpected __wasi_iovec_t layout");
|
||||
|
||||
// If the stream has a readAsync handler, read to buffer defined in iovs, write
|
||||
// number of bytes read to *nread, and return a promise that resolves to the
|
||||
// errno. Otherwise, return null.
|
||||
EM_JS_MACROS(__externref_t, __maybe_fd_read_async, (
|
||||
__wasi_fd_t fd,
|
||||
const __wasi_iovec_t *iovs,
|
||||
size_t iovcnt,
|
||||
__wasi_size_t *nread
|
||||
), {
|
||||
var stream = SYSCALLS.getStreamFromFD(fd);
|
||||
if (!WebAssembly.promising) {
|
||||
return null;
|
||||
}
|
||||
if (!stream.stream_ops.readAsync) {
|
||||
// Not an async device. Fall back to __wasi_fd_read_orig().
|
||||
return null;
|
||||
}
|
||||
return (async () => {
|
||||
// This is the same as libwasi.js fd_read() and doReadv() except we use
|
||||
// readAsync and we await it.
|
||||
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L331
|
||||
// https://github.com/emscripten-core/emscripten/blob/4.0.11/src/lib/libwasi.js?plain=1#L197
|
||||
try {
|
||||
var ret = 0;
|
||||
for (var i = 0; i < iovcnt; i++) {
|
||||
var ptr = HEAP32[(iovs + IOVEC_T_BUF_OFFSET)/4];
|
||||
var len = HEAP32[(iovs + IOVEC_T_BUF_LEN_OFFSET)/4];
|
||||
iovs += IOVEC_T_SIZE;
|
||||
var curr = await stream.stream_ops.readAsync(stream, HEAP8, ptr, len);
|
||||
if (curr < 0) return -1;
|
||||
ret += curr;
|
||||
if (curr < len) break; // nothing more to read
|
||||
}
|
||||
HEAP32[nread/4] = ret;
|
||||
return 0;
|
||||
} catch (e) {
|
||||
if (e.name !== 'ErrnoError') {
|
||||
throw e;
|
||||
}
|
||||
return e.errno;
|
||||
}
|
||||
})();
|
||||
};
|
||||
);
|
||||
|
||||
// Bind original fd_read syscall to __wasi_fd_read_orig().
|
||||
__wasi_errno_t __wasi_fd_read_orig(__wasi_fd_t fd, const __wasi_iovec_t *iovs,
|
||||
size_t iovs_len, __wasi_size_t *nread)
|
||||
__attribute__((__import_module__("wasi_snapshot_preview1"),
|
||||
__import_name__("fd_read"), __warn_unused_result__));
|
||||
|
||||
// Take a promise that resolves to __wasi_errno_t and suspend until it resolves,
|
||||
// get the output.
|
||||
EM_JS(__wasi_errno_t, __block_for_errno, (__externref_t p), {
|
||||
return p;
|
||||
}
|
||||
if (WebAssembly.Suspending) {
|
||||
__block_for_errno = new WebAssembly.Suspending(__block_for_errno);
|
||||
}
|
||||
)
|
||||
|
||||
// Replacement for fd_read syscall. Call __maybe_fd_read_async. If it returned
|
||||
// null, delegate back to __wasi_fd_read_orig. Otherwise, use __block_for_errno
|
||||
// to get the result.
|
||||
__wasi_errno_t __wasi_fd_read(__wasi_fd_t fd, const __wasi_iovec_t *iovs,
|
||||
size_t iovs_len, __wasi_size_t *nread) {
|
||||
__externref_t p = __maybe_fd_read_async(fd, iovs, iovs_len, nread);
|
||||
if (__builtin_wasm_ref_is_null_extern(p)) {
|
||||
return __wasi_fd_read_orig(fd, iovs, iovs_len, nread);
|
||||
}
|
||||
__wasi_errno_t res = __block_for_errno(p);
|
||||
return res;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -263,10 +263,20 @@ def configure_emscripten_python(context, working_dir):
|
|||
REALPATH=abs_path
|
||||
fi
|
||||
|
||||
# Before node 24, --experimental-wasm-jspi uses different API,
|
||||
# After node 24 JSPI is on by default.
|
||||
ARGS=$({host_runner} -e "$(cat <<"EOF"
|
||||
const major_version = Number(process.version.split(".")[0].slice(1));
|
||||
if (major_version === 24) {{
|
||||
process.stdout.write("--experimental-wasm-jspi");
|
||||
}}
|
||||
EOF
|
||||
)")
|
||||
|
||||
# We compute our own path, not following symlinks and pass it in so that
|
||||
# node_entry.mjs can set sys.executable correctly.
|
||||
# Intentionally allow word splitting on NODEFLAGS.
|
||||
exec {host_runner} $NODEFLAGS {node_entry} --this-program="$($REALPATH "$0")" "$@"
|
||||
exec {host_runner} $NODEFLAGS $ARGS {node_entry} --this-program="$($REALPATH "$0")" "$@"
|
||||
"""
|
||||
)
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue