Implement experimental WebGPU support (#1238)

* Web gpu execution MVP Ready infrastructure for wgpu experimentation Start implementing simple gpu test case Fix Extract Node not working with nested networks Convert inputs for extracted node to network inputs Fix missing cors headers Feature gate gcore to make it once again no-std compatible Add skeleton structure gpu shader Work on gpu node graph output saving Fix Get and Set nodes Fix storage nodes Fix shader construction errors -> spirv errors Add unsafe version Add once cell node Web gpu execution MVP
2025-08-31 02:07:21 +00:00 · 2023-05-27 19:27:46 +02:00 · 2023-05-27 19:27:46 +02:00 · 0586d52f3a
commit 0586d52f3a
parent 4bd9fbd073
33 changed files with 1080 additions and 239 deletions
--- a/node-graph/gpu-executor/src/lib.rs
+++ b/node-graph/gpu-executor/src/lib.rs
@ -1,13 +1,15 @@
+use bytemuck::{Pod, Zeroable};
 use graph_craft::proto::ProtoNetwork;
 use graphene_core::*;

 use anyhow::Result;
-use dyn_any::StaticType;
+use dyn_any::{StaticType, StaticTypeSized};
 use futures::Future;
 use glam::UVec3;
 use serde::{Deserialize, Serialize};
 use std::borrow::Cow;
 use std::pin::Pin;
+use std::sync::Arc;

 type ReadBackFuture = Pin<Box<dyn Future<Output = Result<Vec<u8>>>>>;

@ -20,18 +22,18 @@ pub trait GpuExecutor {
 	fn create_uniform_buffer<T: ToUniformBuffer>(&self, data: T) -> Result<ShaderInput<Self::BufferHandle>>;
 	fn create_storage_buffer<T: ToStorageBuffer>(&self, data: T, options: StorageBufferOptions) -> Result<ShaderInput<Self::BufferHandle>>;
 	fn create_output_buffer(&self, len: usize, ty: Type, cpu_readable: bool) -> Result<ShaderInput<Self::BufferHandle>>;
-	fn create_compute_pass(&self, layout: &PipelineLayout<Self>, read_back: Option<ShaderInput<Self::BufferHandle>>, instances: u32) -> Result<Self::CommandBuffer>;
+	fn create_compute_pass(&self, layout: &PipelineLayout<Self>, read_back: Option<Arc<ShaderInput<Self::BufferHandle>>>, instances: u32) -> Result<Self::CommandBuffer>;
 	fn execute_compute_pipeline(&self, encoder: Self::CommandBuffer) -> Result<()>;
-	fn read_output_buffer(&self, buffer: ShaderInput<Self::BufferHandle>) -> Result<ReadBackFuture>;
+	fn read_output_buffer(&self, buffer: Arc<ShaderInput<Self::BufferHandle>>) -> ReadBackFuture;
 }

 pub trait SpirVCompiler {
-	fn compile(&self, network: ProtoNetwork, io: &ShaderIO) -> Result<Shader>;
+	fn compile(&self, network: &[ProtoNetwork], io: &ShaderIO) -> Result<Shader>;
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct CompileRequest {
-	pub network: ProtoNetwork,
+	pub networks: Vec<ProtoNetwork>,
 	pub io: ShaderIO,
 }

@ -101,6 +103,10 @@ impl<BufferHandle> ShaderInput<BufferHandle> {
 			ShaderInput::ReadBackBuffer(_, ty) => ty.clone(),
 		}
 	}
+
+	pub fn is_output(&self) -> bool {
+		matches!(self, ShaderInput::OutputBuffer(_, _))
+	}
 }

 pub struct Shader<'a> {
@ -119,6 +125,7 @@ pub struct StorageBufferOptions {
 	pub cpu_writable: bool,
 	pub gpu_writable: bool,
 	pub cpu_readable: bool,
+	pub storage: bool,
 }

 pub trait ToUniformBuffer: StaticType {
@ -127,13 +134,22 @@ pub trait ToUniformBuffer: StaticType {
 }

 pub trait ToStorageBuffer: StaticType {
-	type StorageBufferHandle;
 	fn to_bytes(&self) -> Cow<[u8]>;
+	fn ty(&self) -> Type;
+}
+
+impl<T: Pod + Zeroable + StaticTypeSized> ToStorageBuffer for Vec<T> {
+	fn to_bytes(&self) -> Cow<[u8]> {
+		Cow::Borrowed(bytemuck::cast_slice(self.as_slice()))
+	}
+	fn ty(&self) -> Type {
+		concrete!(T)
+	}
 }

 /// Collection of all arguments that are passed to the shader.
 pub struct Bindgroup<E: GpuExecutor + ?Sized> {
-	pub buffers: Vec<ShaderInput<E::BufferHandle>>,
+	pub buffers: Vec<Arc<ShaderInput<E::BufferHandle>>>,
 }

 /// A struct representing a compute pipeline.
@ -141,7 +157,7 @@ pub struct PipelineLayout<E: GpuExecutor + ?Sized> {
 	pub shader: E::ShaderHandle,
 	pub entry_point: String,
 	pub bind_group: Bindgroup<E>,
-	pub output_buffer: ShaderInput<E::BufferHandle>,
+	pub output_buffer: Arc<ShaderInput<E::BufferHandle>>,
 }

 /// Extracts arguments from the function arguments and wraps them in a node.
@ -185,6 +201,7 @@ fn storage_node<T: ToStorageBuffer, E: GpuExecutor>(data: T, executor: &'input E
 				cpu_writable: false,
 				gpu_writable: true,
 				cpu_readable: false,
+				storage: true,
 			},
 		)
 		.unwrap()
@ -216,8 +233,8 @@ pub struct CreateComputePassNode<Executor, Output, Instances> {
 }

 #[node_macro::node_fn(CreateComputePassNode)]
-fn create_compute_pass_node<E: GpuExecutor>(layout: PipelineLayout<E>, executor: &'input E, output: ShaderInput<E::BufferHandle>, instances: u32) -> E::CommandBuffer {
-	executor.create_compute_pass(&layout, Some(output), instances).unwrap()
+fn create_compute_pass_node<E: GpuExecutor + 'input>(layout: PipelineLayout<E>, executor: &'input E, output: ShaderInput<E::BufferHandle>, instances: u32) -> E::CommandBuffer {
+	executor.create_compute_pass(&layout, Some(output.into()), instances).unwrap()
 }

 pub struct CreatePipelineLayoutNode<_E, EntryPoint, Bindgroup, OutputBuffer> {
@ -228,7 +245,7 @@ pub struct CreatePipelineLayoutNode<_E, EntryPoint, Bindgroup, OutputBuffer> {
 }

 #[node_macro::node_fn(CreatePipelineLayoutNode<_E>)]
-fn create_pipeline_layout_node<_E: GpuExecutor>(shader: _E::ShaderHandle, entry_point: String, bind_group: Bindgroup<_E>, output_buffer: ShaderInput<_E::BufferHandle>) -> PipelineLayout<_E> {
+fn create_pipeline_layout_node<_E: GpuExecutor>(shader: _E::ShaderHandle, entry_point: String, bind_group: Bindgroup<_E>, output_buffer: Arc<ShaderInput<_E::BufferHandle>>) -> PipelineLayout<_E> {
 	PipelineLayout {
 		shader,
 		entry_point,