Lay groundwork for directly rendering to the canvas without a cpu roundrip (#1291)

* Add Texture handle type * Add Texture View to shader inputs * Implement basic rendering pipeline * Render first texture using render pipeline * Fix output color space * Precompute the rendering pipeline * Move gpu context creation to editor api * Port gpu-executor nodes to node registry * Fix canvas nodes and make code compile for non wasm targets * Pin wasm-bindgen version * Disable miri temoporarily for better ci times * Fix formatting * Remove unsafe block * Bump wasm-pack version * Bump wasm-bindgen version * Add gpu feature guard for push node * Make Into node async
2025-08-28 17:04:05 +00:00 · 2023-06-07 17:13:21 +02:00 · 2023-06-07 17:13:21 +02:00 · 45b04f4eb9
commit 45b04f4eb9
parent 0c93a62d55
33 changed files with 1574 additions and 339 deletions
--- a/node-graph/wgpu-executor/src/lib.rs
+++ b/node-graph/wgpu-executor/src/lib.rs
@ -2,36 +2,117 @@ mod context;
 mod executor;

 pub use context::Context;
+use dyn_any::{DynAny, StaticType};
 pub use executor::GpuExecutor;
-use gpu_executor::{ComputePassDimensions, Shader, ShaderInput, StorageBufferOptions, ToStorageBuffer, ToUniformBuffer};
+use gpu_executor::{ComputePassDimensions, Shader, ShaderInput, StorageBufferOptions, TextureBufferOptions, TextureBufferType, ToStorageBuffer, ToUniformBuffer};
 use graph_craft::Type;

 use anyhow::{bail, Result};
 use futures::Future;
+use graphene_core::application_io::{ApplicationIo, EditorApi, SurfaceHandle};
+
 use std::pin::Pin;
 use std::sync::Arc;
-use wgpu::util::DeviceExt;
-use wgpu::{Buffer, BufferDescriptor, CommandBuffer, ShaderModule};

-#[derive(Debug, Clone)]
-pub struct NewExecutor {
+use wgpu::util::DeviceExt;
+use wgpu::{Buffer, BufferDescriptor, CommandBuffer, ShaderModule, Texture, TextureView};
+
+#[cfg(target_arch = "wasm32")]
+use web_sys::HtmlCanvasElement;
+
+#[derive(Debug, dyn_any::DynAny)]
+pub struct WgpuExecutor {
 	context: Context,
+	render_configuration: RenderConfiguration,
 }

-impl gpu_executor::GpuExecutor for NewExecutor {
-	type ShaderHandle = ShaderModule;
+impl<'a, T: ApplicationIo<Executor = WgpuExecutor>> From<EditorApi<'a, T>> for &'a WgpuExecutor {
+	fn from(editor_api: EditorApi<'a, T>) -> Self {
+		editor_api.application_io.gpu_executor().unwrap()
+	}
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug, bytemuck::Pod, bytemuck::Zeroable)]
+struct Vertex {
+	position: [f32; 3],
+	tex_coords: [f32; 2],
+}
+
+impl Vertex {
+	fn desc() -> wgpu::VertexBufferLayout<'static> {
+		use std::mem;
+		wgpu::VertexBufferLayout {
+			array_stride: mem::size_of::<Vertex>() as wgpu::BufferAddress,
+			step_mode: wgpu::VertexStepMode::Vertex,
+			attributes: &[
+				wgpu::VertexAttribute {
+					offset: 0,
+					shader_location: 0,
+					format: wgpu::VertexFormat::Float32x3,
+				},
+				wgpu::VertexAttribute {
+					offset: mem::size_of::<[f32; 3]>() as wgpu::BufferAddress,
+					shader_location: 1,
+					format: wgpu::VertexFormat::Float32x2,
+				},
+			],
+		}
+	}
+}
+
+const VERTICES: &[Vertex] = &[
+	Vertex {
+		position: [-1., 1., 0.0],
+		tex_coords: [0., 0.],
+	}, // A
+	Vertex {
+		position: [-1., -1., 0.0],
+		tex_coords: [0., 1.],
+	}, // B
+	Vertex {
+		position: [1., 1., 0.0],
+		tex_coords: [1., 0.],
+	}, // C
+	Vertex {
+		position: [1., -1., 0.0],
+		tex_coords: [1., 1.],
+	}, // D
+];
+
+const INDICES: &[u16] = &[0, 1, 2, 2, 1, 3];
+
+type WgpuShaderInput = ShaderInput<WgpuExecutor>;
+
+#[derive(Debug, DynAny)]
+#[repr(transparent)]
+pub struct CommandBufferWrapper(CommandBuffer);
+
+#[derive(Debug, DynAny)]
+#[repr(transparent)]
+pub struct ShaderModuleWrapper(ShaderModule);
+
+impl gpu_executor::GpuExecutor for WgpuExecutor {
+	type ShaderHandle = ShaderModuleWrapper;
 	type BufferHandle = Buffer;
-	type CommandBuffer = CommandBuffer;
+	type TextureHandle = Texture;
+	type TextureView = TextureView;
+	type CommandBuffer = CommandBufferWrapper;
+	type Surface = wgpu::Surface;
+	#[cfg(target_arch = "wasm32")]
+	type Window = HtmlCanvasElement;
+	#[cfg(not(target_arch = "wasm32"))]
+	type Window = winit::window::Window;

 	fn load_shader(&self, shader: Shader) -> Result<Self::ShaderHandle> {
 		let shader_module = self.context.device.create_shader_module(wgpu::ShaderModuleDescriptor {
 			label: Some(shader.name),
 			source: wgpu::ShaderSource::SpirV(shader.source),
 		});
-		Ok(shader_module)
+		Ok(ShaderModuleWrapper(shader_module))
 	}

-	fn create_uniform_buffer<T: ToUniformBuffer>(&self, data: T) -> Result<ShaderInput<Self::BufferHandle>> {
+	fn create_uniform_buffer<T: ToUniformBuffer>(&self, data: T) -> Result<WgpuShaderInput> {
 		let bytes = data.to_bytes();
 		let buffer = self.context.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
 			label: None,
@ -41,7 +122,7 @@ impl gpu_executor::GpuExecutor for NewExecutor {
 		Ok(ShaderInput::UniformBuffer(buffer, Type::new::<T>()))
 	}

-	fn create_storage_buffer<T: ToStorageBuffer>(&self, data: T, options: StorageBufferOptions) -> Result<ShaderInput<Self::BufferHandle>> {
+	fn create_storage_buffer<T: ToStorageBuffer>(&self, data: T, options: StorageBufferOptions) -> Result<WgpuShaderInput> {
 		let bytes = data.to_bytes();
 		let mut usage = wgpu::BufferUsages::empty();

@ -66,8 +147,44 @@ impl gpu_executor::GpuExecutor for NewExecutor {
 		});
 		Ok(ShaderInput::StorageBuffer(buffer, data.ty()))
 	}
+	fn create_texture_buffer<T: gpu_executor::ToTextureBuffer>(&self, data: T, options: TextureBufferOptions) -> Result<WgpuShaderInput> {
+		let bytes = data.to_bytes();
+		let usage = match options {
+			TextureBufferOptions::Storage => wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC,
+			TextureBufferOptions::Texture => wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
+			TextureBufferOptions::Surface => wgpu::TextureUsages::RENDER_ATTACHMENT,
+		};
+		let format = match T::format() {
+			TextureBufferType::Rgba32Float => wgpu::TextureFormat::Rgba32Float,
+			TextureBufferType::Rgba8Srgb => wgpu::TextureFormat::Rgba8UnormSrgb,
+		};

-	fn create_output_buffer(&self, len: usize, ty: Type, cpu_readable: bool) -> Result<ShaderInput<Self::BufferHandle>> {
+		let buffer = self.context.device.create_texture_with_data(
+			self.context.queue.as_ref(),
+			&wgpu::TextureDescriptor {
+				label: None,
+				size: wgpu::Extent3d {
+					width: data.size().0,
+					height: data.size().1,
+					depth_or_array_layers: 1,
+				},
+				mip_level_count: 1,
+				sample_count: 1,
+				dimension: wgpu::TextureDimension::D2,
+				format,
+				usage,
+				view_formats: &[format],
+			},
+			bytes.as_ref(),
+		);
+		match options {
+			TextureBufferOptions::Storage => Ok(ShaderInput::StorageTextureBuffer(buffer, T::ty())),
+			TextureBufferOptions::Texture => Ok(ShaderInput::TextureBuffer(buffer, T::ty())),
+			TextureBufferOptions::Surface => Ok(ShaderInput::TextureBuffer(buffer, T::ty())),
+		}
+	}
+
+	fn create_output_buffer(&self, len: usize, ty: Type, cpu_readable: bool) -> Result<WgpuShaderInput> {
 		log::debug!("Creating output buffer with len: {}", len);
 		let create_buffer = |usage| {
 			Ok::<_, anyhow::Error>(self.context.device.create_buffer(&BufferDescriptor {
@ -83,11 +200,11 @@ impl gpu_executor::GpuExecutor for NewExecutor {
 		};
 		Ok(buffer)
 	}
-	fn create_compute_pass(&self, layout: &gpu_executor::PipelineLayout<Self>, read_back: Option<Arc<ShaderInput<Self::BufferHandle>>>, instances: ComputePassDimensions) -> Result<CommandBuffer> {
+	fn create_compute_pass(&self, layout: &gpu_executor::PipelineLayout<Self>, read_back: Option<Arc<WgpuShaderInput>>, instances: ComputePassDimensions) -> Result<Self::CommandBuffer> {
 		let compute_pipeline = self.context.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
 			label: None,
 			layout: None,
-			module: &layout.shader,
+			module: &layout.shader.0,
 			entry_point: layout.entry_point.as_str(),
 		});
 		let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
@ -97,11 +214,15 @@ impl gpu_executor::GpuExecutor for NewExecutor {
 			.buffers
 			.iter()
 			.chain(std::iter::once(&layout.output_buffer))
-			.flat_map(|input| input.buffer())
+			.flat_map(|input| input.binding())
 			.enumerate()
 			.map(|(i, buffer)| wgpu::BindGroupEntry {
 				binding: i as u32,
-				resource: buffer.as_entire_binding(),
+				resource: match buffer {
+					gpu_executor::BindingType::UniformBuffer(buf) => buf.as_entire_binding(),
+					gpu_executor::BindingType::StorageBuffer(buf) => buf.as_entire_binding(),
+					gpu_executor::BindingType::TextureView(buf) => wgpu::BindingResource::TextureView(buf),
+				},
 			})
 			.collect::<Vec<_>>();

@ -123,27 +244,77 @@ impl gpu_executor::GpuExecutor for NewExecutor {
 		// Sets adds copy operation to command encoder.
 		// Will copy data from storage buffer on GPU to staging buffer on CPU.
 		if let Some(buffer) = read_back {
-			let ShaderInput::ReadBackBuffer(output, ty) = buffer.as_ref() else {
+			let ShaderInput::ReadBackBuffer(output, _ty) = buffer.as_ref() else {
 				bail!("Tried to read back from a non read back buffer");
 			};
 			let size = output.size();
-			assert_eq!(size, layout.output_buffer.buffer().unwrap().size());
+			let ShaderInput::OutputBuffer(output_buffer, ty) = layout.output_buffer.as_ref() else {
+				bail!("Tried to read back from a non output buffer");
+			};
+			assert_eq!(size, output_buffer.size());
 			assert_eq!(ty, &layout.output_buffer.ty());
-			encoder.copy_buffer_to_buffer(
-				layout.output_buffer.buffer().ok_or_else(|| anyhow::anyhow!("Tried to use an non buffer as the shader output"))?,
-				0,
-				output,
-				0,
-				size,
-			);
+			encoder.copy_buffer_to_buffer(output_buffer, 0, output, 0, size);
 		}

 		// Submits command encoder for processing
-		Ok(encoder.finish())
+		Ok(CommandBufferWrapper(encoder.finish()))
+	}
+
+	fn create_render_pass(&self, texture: Arc<ShaderInput<Self>>, canvas: Arc<SurfaceHandle<wgpu::Surface>>) -> Result<()> {
+		let texture = texture.texture().expect("Expected texture input");
+		let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+		let output = canvas.as_ref().surface.get_current_texture()?;
+		let view = output.texture.create_view(&wgpu::TextureViewDescriptor {
+			format: Some(wgpu::TextureFormat::Rgba8Unorm),
+			..Default::default()
+		});
+		let output_texture_bind_group = self.context.device.create_bind_group(&wgpu::BindGroupDescriptor {
+			layout: &self.render_configuration.texture_bind_group_layout,
+			entries: &[
+				wgpu::BindGroupEntry {
+					binding: 0,
+					resource: wgpu::BindingResource::TextureView(&texture_view),
+				},
+				wgpu::BindGroupEntry {
+					binding: 1,
+					resource: wgpu::BindingResource::Sampler(&self.render_configuration.sampler),
+				},
+			],
+			label: Some("output_texture_bind_group"),
+		});
+
+		let mut encoder = self.context.device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("Render Encoder") });
+
+		{
+			let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+				label: Some("Render Pass"),
+				color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+					view: &view,
+					resolve_target: None,
+					ops: wgpu::Operations {
+						load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
+						store: true,
+					},
+				})],
+				depth_stencil_attachment: None,
+			});
+
+			render_pass.set_pipeline(&self.render_configuration.render_pipeline);
+			render_pass.set_bind_group(0, &output_texture_bind_group, &[]);
+			render_pass.set_vertex_buffer(0, self.render_configuration.vertex_buffer.slice(..));
+			render_pass.set_index_buffer(self.render_configuration.index_buffer.slice(..), wgpu::IndexFormat::Uint16);
+			render_pass.draw_indexed(0..self.render_configuration.num_indices, 0, 0..1);
+		}
+
+		let encoder = encoder.finish();
+		self.context.queue.submit(Some(encoder));
+		output.present();
+
+		Ok(())
 	}

 	fn execute_compute_pipeline(&self, encoder: Self::CommandBuffer) -> Result<()> {
-		self.context.queue.submit(Some(encoder));
+		self.context.queue.submit(Some(encoder.0));

 		// Poll the device in a blocking manner so that our future resolves.
 		// In an actual application, `device.poll(...)` should
@ -152,7 +323,7 @@ impl gpu_executor::GpuExecutor for NewExecutor {
 		Ok(())
 	}

-	fn read_output_buffer(&self, buffer: Arc<ShaderInput<Self::BufferHandle>>) -> Pin<Box<dyn Future<Output = Result<Vec<u8>>>>> {
+	fn read_output_buffer(&self, buffer: Arc<ShaderInput<Self>>) -> Pin<Box<dyn Future<Output = Result<Vec<u8>>>>> {
 		Box::pin(async move {
 			if let ShaderInput::ReadBackBuffer(buffer, _) = buffer.as_ref() {
 				let buffer_slice = buffer.slice(..);
@ -187,13 +358,168 @@ impl gpu_executor::GpuExecutor for NewExecutor {
 			} else {
 				bail!("Tried to read a non readback buffer")
 			}
-		}) as _
+		})
+	}
+
+	fn create_texture_view(&self, texture: ShaderInput<Self>) -> Result<ShaderInput<Self>> {
+		//Ok(ShaderInput::TextureView(texture.create_view(&wgpu::TextureViewDescriptor::default()), ) )
+		let ShaderInput::TextureBuffer(texture, ty) = &texture else {
+			bail!("Tried to create a texture view from a non texture");
+		};
+		let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
+		Ok(ShaderInput::TextureView(view, ty.clone()))
+	}
+
+	#[cfg(target_arch = "wasm32")]
+	fn create_surface(&self, canvas: graphene_core::WasmSurfaceHandle) -> Result<SurfaceHandle<wgpu::Surface>> {
+		let surface = self.context.instance.create_surface_from_canvas(canvas.surface)?;
+
+		let surface_caps = surface.get_capabilities(&self.context.adapter);
+		let surface_format = wgpu::TextureFormat::Rgba8Unorm;
+		let config = wgpu::SurfaceConfiguration {
+			usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
+			format: surface_format,
+			width: 1920,
+			height: 1080,
+			present_mode: surface_caps.present_modes[0],
+			alpha_mode: wgpu::CompositeAlphaMode::PreMultiplied,
+			view_formats: vec![wgpu::TextureFormat::Rgba8UnormSrgb],
+		};
+		surface.configure(&self.context.device, &config);
+		Ok(SurfaceHandle {
+			surface_id: canvas.surface_id,
+			surface,
+		})
+	}
+	#[cfg(not(target_arch = "wasm32"))]
+	fn create_surface(&self, window: SurfaceHandle<winit::window::Window>) -> Result<SurfaceHandle<wgpu::Surface>> {
+		let surface = unsafe { self.context.instance.create_surface(&window.surface) }?;
+		let surface_id = window.surface_id;
+		Ok(SurfaceHandle { surface_id, surface })
 	}
 }

-impl NewExecutor {
+impl WgpuExecutor {
 	pub async fn new() -> Option<Self> {
 		let context = Context::new().await?;
-		Some(Self { context })
+
+		let texture_bind_group_layout = context.device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+			entries: &[
+				wgpu::BindGroupLayoutEntry {
+					binding: 0,
+					visibility: wgpu::ShaderStages::FRAGMENT,
+					ty: wgpu::BindingType::Texture {
+						multisampled: false,
+						view_dimension: wgpu::TextureViewDimension::D2,
+						sample_type: wgpu::TextureSampleType::Float { filterable: false },
+					},
+					count: None,
+				},
+				wgpu::BindGroupLayoutEntry {
+					binding: 1,
+					visibility: wgpu::ShaderStages::FRAGMENT,
+					ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
+					count: None,
+				},
+			],
+			label: Some("texture_bind_group_layout"),
+		});
+
+		let sampler = context.device.create_sampler(&wgpu::SamplerDescriptor {
+			address_mode_u: wgpu::AddressMode::ClampToEdge,
+			address_mode_v: wgpu::AddressMode::ClampToEdge,
+			address_mode_w: wgpu::AddressMode::ClampToEdge,
+			mag_filter: wgpu::FilterMode::Nearest,
+			min_filter: wgpu::FilterMode::Nearest,
+			mipmap_filter: wgpu::FilterMode::Nearest,
+			..Default::default()
+		});
+
+		let shader = context.device.create_shader_module(wgpu::ShaderModuleDescriptor {
+			label: Some("Shader"),
+			source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()),
+		});
+
+		let render_pipeline_layout = context.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+			label: Some("Render Pipeline Layout"),
+			bind_group_layouts: &[&texture_bind_group_layout],
+			push_constant_ranges: &[],
+		});
+
+		let render_pipeline = context.device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
+			label: Some("Render Pipeline"),
+			layout: Some(&render_pipeline_layout),
+			vertex: wgpu::VertexState {
+				module: &shader,
+				entry_point: "vs_main",
+				buffers: &[Vertex::desc()],
+			},
+			fragment: Some(wgpu::FragmentState {
+				module: &shader,
+				entry_point: "fs_main",
+				targets: &[Some(wgpu::ColorTargetState {
+					format: wgpu::TextureFormat::Rgba8Unorm,
+					blend: Some(wgpu::BlendState {
+						color: wgpu::BlendComponent::REPLACE,
+						alpha: wgpu::BlendComponent::REPLACE,
+					}),
+					write_mask: wgpu::ColorWrites::ALL,
+				})],
+			}),
+			primitive: wgpu::PrimitiveState {
+				topology: wgpu::PrimitiveTopology::TriangleList,
+				strip_index_format: None,
+				front_face: wgpu::FrontFace::Ccw,
+				cull_mode: None,
+				// Setting this to anything other than Fill requires Features::POLYGON_MODE_LINE
+				// or Features::POLYGON_MODE_POINT
+				polygon_mode: wgpu::PolygonMode::Fill,
+				// Requires Features::DEPTH_CLIP_CONTROL
+				unclipped_depth: false,
+				// Requires Features::CONSERVATIVE_RASTERIZATION
+				conservative: false,
+			},
+			depth_stencil: None,
+			multisample: wgpu::MultisampleState {
+				count: 1,
+				mask: !0,
+				alpha_to_coverage_enabled: false,
+			},
+			// If the pipeline will be used with a multiview render pass, this
+			// indicates how many array layers the attachments will have.
+			multiview: None,
+		});
+
+		let vertex_buffer = context.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+			label: Some("Vertex Buffer"),
+			contents: bytemuck::cast_slice(VERTICES),
+			usage: wgpu::BufferUsages::VERTEX,
+		});
+		let index_buffer = context.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
+			label: Some("Index Buffer"),
+			contents: bytemuck::cast_slice(INDICES),
+			usage: wgpu::BufferUsages::INDEX,
+		});
+		let num_indices = INDICES.len() as u32;
+		let render_configuration = RenderConfiguration {
+			vertex_buffer,
+			index_buffer,
+			num_indices,
+			render_pipeline,
+			texture_bind_group_layout,
+			sampler,
+		};
+
+		Some(Self { context, render_configuration })
 	}
 }
+
+#[derive(Debug)]
+struct RenderConfiguration {
+	vertex_buffer: wgpu::Buffer,
+	index_buffer: wgpu::Buffer,
+	num_indices: u32,
+	render_pipeline: wgpu::RenderPipeline,
+	texture_bind_group_layout: wgpu::BindGroupLayout,
+	sampler: wgpu::Sampler,
+}