Implement experimental WebGPU support (#1238)

* Web gpu execution MVP

Ready infrastructure for wgpu experimentation

Start implementing simple gpu test case

Fix Extract Node not working with nested networks

Convert inputs for extracted node to network inputs

Fix missing cors headers

Feature gate gcore to make it once again no-std compatible

Add skeleton structure gpu shader

Work on gpu node graph output saving

Fix Get and Set nodes

Fix storage nodes

Fix shader construction errors -> spirv errors

Add unsafe version

Add once cell node

Web gpu execution MVP
This commit is contained in:
Dennis Kobert 2023-05-27 19:27:46 +02:00 committed by Keavon Chambers
parent 4bd9fbd073
commit 0586d52f3a
33 changed files with 1080 additions and 239 deletions

View file

@ -1,13 +1,15 @@
use bytemuck::{Pod, Zeroable};
use graph_craft::proto::ProtoNetwork;
use graphene_core::*;
use anyhow::Result;
use dyn_any::StaticType;
use dyn_any::{StaticType, StaticTypeSized};
use futures::Future;
use glam::UVec3;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::pin::Pin;
use std::sync::Arc;
type ReadBackFuture = Pin<Box<dyn Future<Output = Result<Vec<u8>>>>>;
@ -20,18 +22,18 @@ pub trait GpuExecutor {
fn create_uniform_buffer<T: ToUniformBuffer>(&self, data: T) -> Result<ShaderInput<Self::BufferHandle>>;
fn create_storage_buffer<T: ToStorageBuffer>(&self, data: T, options: StorageBufferOptions) -> Result<ShaderInput<Self::BufferHandle>>;
fn create_output_buffer(&self, len: usize, ty: Type, cpu_readable: bool) -> Result<ShaderInput<Self::BufferHandle>>;
fn create_compute_pass(&self, layout: &PipelineLayout<Self>, read_back: Option<ShaderInput<Self::BufferHandle>>, instances: u32) -> Result<Self::CommandBuffer>;
fn create_compute_pass(&self, layout: &PipelineLayout<Self>, read_back: Option<Arc<ShaderInput<Self::BufferHandle>>>, instances: u32) -> Result<Self::CommandBuffer>;
fn execute_compute_pipeline(&self, encoder: Self::CommandBuffer) -> Result<()>;
fn read_output_buffer(&self, buffer: ShaderInput<Self::BufferHandle>) -> Result<ReadBackFuture>;
fn read_output_buffer(&self, buffer: Arc<ShaderInput<Self::BufferHandle>>) -> ReadBackFuture;
}
pub trait SpirVCompiler {
fn compile(&self, network: ProtoNetwork, io: &ShaderIO) -> Result<Shader>;
fn compile(&self, network: &[ProtoNetwork], io: &ShaderIO) -> Result<Shader>;
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct CompileRequest {
pub network: ProtoNetwork,
pub networks: Vec<ProtoNetwork>,
pub io: ShaderIO,
}
@ -101,6 +103,10 @@ impl<BufferHandle> ShaderInput<BufferHandle> {
ShaderInput::ReadBackBuffer(_, ty) => ty.clone(),
}
}
pub fn is_output(&self) -> bool {
matches!(self, ShaderInput::OutputBuffer(_, _))
}
}
pub struct Shader<'a> {
@ -119,6 +125,7 @@ pub struct StorageBufferOptions {
pub cpu_writable: bool,
pub gpu_writable: bool,
pub cpu_readable: bool,
pub storage: bool,
}
pub trait ToUniformBuffer: StaticType {
@ -127,13 +134,22 @@ pub trait ToUniformBuffer: StaticType {
}
pub trait ToStorageBuffer: StaticType {
type StorageBufferHandle;
fn to_bytes(&self) -> Cow<[u8]>;
fn ty(&self) -> Type;
}
impl<T: Pod + Zeroable + StaticTypeSized> ToStorageBuffer for Vec<T> {
fn to_bytes(&self) -> Cow<[u8]> {
Cow::Borrowed(bytemuck::cast_slice(self.as_slice()))
}
fn ty(&self) -> Type {
concrete!(T)
}
}
/// Collection of all arguments that are passed to the shader.
pub struct Bindgroup<E: GpuExecutor + ?Sized> {
pub buffers: Vec<ShaderInput<E::BufferHandle>>,
pub buffers: Vec<Arc<ShaderInput<E::BufferHandle>>>,
}
/// A struct representing a compute pipeline.
@ -141,7 +157,7 @@ pub struct PipelineLayout<E: GpuExecutor + ?Sized> {
pub shader: E::ShaderHandle,
pub entry_point: String,
pub bind_group: Bindgroup<E>,
pub output_buffer: ShaderInput<E::BufferHandle>,
pub output_buffer: Arc<ShaderInput<E::BufferHandle>>,
}
/// Extracts arguments from the function arguments and wraps them in a node.
@ -185,6 +201,7 @@ fn storage_node<T: ToStorageBuffer, E: GpuExecutor>(data: T, executor: &'input E
cpu_writable: false,
gpu_writable: true,
cpu_readable: false,
storage: true,
},
)
.unwrap()
@ -216,8 +233,8 @@ pub struct CreateComputePassNode<Executor, Output, Instances> {
}
#[node_macro::node_fn(CreateComputePassNode)]
fn create_compute_pass_node<E: GpuExecutor>(layout: PipelineLayout<E>, executor: &'input E, output: ShaderInput<E::BufferHandle>, instances: u32) -> E::CommandBuffer {
executor.create_compute_pass(&layout, Some(output), instances).unwrap()
fn create_compute_pass_node<E: GpuExecutor + 'input>(layout: PipelineLayout<E>, executor: &'input E, output: ShaderInput<E::BufferHandle>, instances: u32) -> E::CommandBuffer {
executor.create_compute_pass(&layout, Some(output.into()), instances).unwrap()
}
pub struct CreatePipelineLayoutNode<_E, EntryPoint, Bindgroup, OutputBuffer> {
@ -228,7 +245,7 @@ pub struct CreatePipelineLayoutNode<_E, EntryPoint, Bindgroup, OutputBuffer> {
}
#[node_macro::node_fn(CreatePipelineLayoutNode<_E>)]
fn create_pipeline_layout_node<_E: GpuExecutor>(shader: _E::ShaderHandle, entry_point: String, bind_group: Bindgroup<_E>, output_buffer: ShaderInput<_E::BufferHandle>) -> PipelineLayout<_E> {
fn create_pipeline_layout_node<_E: GpuExecutor>(shader: _E::ShaderHandle, entry_point: String, bind_group: Bindgroup<_E>, output_buffer: Arc<ShaderInput<_E::BufferHandle>>) -> PipelineLayout<_E> {
PipelineLayout {
shader,
entry_point,