Lay groundwork for directly rendering to the canvas without a cpu roundrip (#1291)

* Add Texture handle type

* Add Texture View to shader inputs

* Implement basic rendering pipeline

* Render first texture using render pipeline

* Fix output color space

* Precompute the rendering pipeline

* Move gpu context creation to editor api

* Port gpu-executor nodes to node registry

* Fix canvas nodes and make code compile for non wasm targets

* Pin wasm-bindgen version

* Disable miri temoporarily for better ci times

* Fix formatting

* Remove unsafe block

* Bump wasm-pack version

* Bump wasm-bindgen version

* Add gpu feature guard for push node

* Make Into node async
This commit is contained in:
Dennis Kobert 2023-06-07 17:13:21 +02:00 committed by Keavon Chambers
parent 0c93a62d55
commit 45b04f4eb9
33 changed files with 1574 additions and 339 deletions

View file

@ -2,36 +2,117 @@ mod context;
mod executor;
pub use context::Context;
use dyn_any::{DynAny, StaticType};
pub use executor::GpuExecutor;
use gpu_executor::{ComputePassDimensions, Shader, ShaderInput, StorageBufferOptions, ToStorageBuffer, ToUniformBuffer};
use gpu_executor::{ComputePassDimensions, Shader, ShaderInput, StorageBufferOptions, TextureBufferOptions, TextureBufferType, ToStorageBuffer, ToUniformBuffer};
use graph_craft::Type;
use anyhow::{bail, Result};
use futures::Future;
use graphene_core::application_io::{ApplicationIo, EditorApi, SurfaceHandle};
use std::pin::Pin;
use std::sync::Arc;
use wgpu::util::DeviceExt;
use wgpu::{Buffer, BufferDescriptor, CommandBuffer, ShaderModule};
#[derive(Debug, Clone)]
pub struct NewExecutor {
use wgpu::util::DeviceExt;
use wgpu::{Buffer, BufferDescriptor, CommandBuffer, ShaderModule, Texture, TextureView};
#[cfg(target_arch = "wasm32")]
use web_sys::HtmlCanvasElement;
#[derive(Debug, dyn_any::DynAny)]
pub struct WgpuExecutor {
context: Context,
render_configuration: RenderConfiguration,
}
impl gpu_executor::GpuExecutor for NewExecutor {
type ShaderHandle = ShaderModule;
impl<'a, T: ApplicationIo<Executor = WgpuExecutor>> From<EditorApi<'a, T>> for &'a WgpuExecutor {
fn from(editor_api: EditorApi<'a, T>) -> Self {
editor_api.application_io.gpu_executor().unwrap()
}
}
#[repr(C)]
#[derive(Copy, Clone, Debug, bytemuck::Pod, bytemuck::Zeroable)]
struct Vertex {
position: [f32; 3],
tex_coords: [f32; 2],
}
impl Vertex {
fn desc() -> wgpu::VertexBufferLayout<'static> {
use std::mem;
wgpu::VertexBufferLayout {
array_stride: mem::size_of::<Vertex>() as wgpu::BufferAddress,
step_mode: wgpu::VertexStepMode::Vertex,
attributes: &[
wgpu::VertexAttribute {
offset: 0,
shader_location: 0,
format: wgpu::VertexFormat::Float32x3,
},
wgpu::VertexAttribute {
offset: mem::size_of::<[f32; 3]>() as wgpu::BufferAddress,
shader_location: 1,
format: wgpu::VertexFormat::Float32x2,
},
],
}
}
}
const VERTICES: &[Vertex] = &[
Vertex {
position: [-1., 1., 0.0],
tex_coords: [0., 0.],
}, // A
Vertex {
position: [-1., -1., 0.0],
tex_coords: [0., 1.],
}, // B
Vertex {
position: [1., 1., 0.0],
tex_coords: [1., 0.],
}, // C
Vertex {
position: [1., -1., 0.0],
tex_coords: [1., 1.],
}, // D
];
const INDICES: &[u16] = &[0, 1, 2, 2, 1, 3];
type WgpuShaderInput = ShaderInput<WgpuExecutor>;
#[derive(Debug, DynAny)]
#[repr(transparent)]
pub struct CommandBufferWrapper(CommandBuffer);
#[derive(Debug, DynAny)]
#[repr(transparent)]
pub struct ShaderModuleWrapper(ShaderModule);
impl gpu_executor::GpuExecutor for WgpuExecutor {
type ShaderHandle = ShaderModuleWrapper;
type BufferHandle = Buffer;
type CommandBuffer = CommandBuffer;
type TextureHandle = Texture;
type TextureView = TextureView;
type CommandBuffer = CommandBufferWrapper;
type Surface = wgpu::Surface;
#[cfg(target_arch = "wasm32")]
type Window = HtmlCanvasElement;
#[cfg(not(target_arch = "wasm32"))]
type Window = winit::window::Window;
fn load_shader(&self, shader: Shader) -> Result<Self::ShaderHandle> {
let shader_module = self.context.device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some(shader.name),
source: wgpu::ShaderSource::SpirV(shader.source),
});
Ok(shader_module)
Ok(ShaderModuleWrapper(shader_module))
}
fn create_uniform_buffer<T: ToUniformBuffer>(&self, data: T) -> Result<ShaderInput<Self::BufferHandle>> {
fn create_uniform_buffer<T: ToUniformBuffer>(&self, data: T) -> Result<WgpuShaderInput> {
let bytes = data.to_bytes();
let buffer = self.context.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: None,
@ -41,7 +122,7 @@ impl gpu_executor::GpuExecutor for NewExecutor {
Ok(ShaderInput::UniformBuffer(buffer, Type::new::<T>()))
}
fn create_storage_buffer<T: ToStorageBuffer>(&self, data: T, options: StorageBufferOptions) -> Result<ShaderInput<Self::BufferHandle>> {
fn create_storage_buffer<T: ToStorageBuffer>(&self, data: T, options: StorageBufferOptions) -> Result<WgpuShaderInput> {
let bytes = data.to_bytes();
let mut usage = wgpu::BufferUsages::empty();
@ -66,8 +147,44 @@ impl gpu_executor::GpuExecutor for NewExecutor {
});
Ok(ShaderInput::StorageBuffer(buffer, data.ty()))
}
fn create_texture_buffer<T: gpu_executor::ToTextureBuffer>(&self, data: T, options: TextureBufferOptions) -> Result<WgpuShaderInput> {
let bytes = data.to_bytes();
let usage = match options {
TextureBufferOptions::Storage => wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_DST | wgpu::TextureUsages::COPY_SRC,
TextureBufferOptions::Texture => wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
TextureBufferOptions::Surface => wgpu::TextureUsages::RENDER_ATTACHMENT,
};
let format = match T::format() {
TextureBufferType::Rgba32Float => wgpu::TextureFormat::Rgba32Float,
TextureBufferType::Rgba8Srgb => wgpu::TextureFormat::Rgba8UnormSrgb,
};
fn create_output_buffer(&self, len: usize, ty: Type, cpu_readable: bool) -> Result<ShaderInput<Self::BufferHandle>> {
let buffer = self.context.device.create_texture_with_data(
self.context.queue.as_ref(),
&wgpu::TextureDescriptor {
label: None,
size: wgpu::Extent3d {
width: data.size().0,
height: data.size().1,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format,
usage,
view_formats: &[format],
},
bytes.as_ref(),
);
match options {
TextureBufferOptions::Storage => Ok(ShaderInput::StorageTextureBuffer(buffer, T::ty())),
TextureBufferOptions::Texture => Ok(ShaderInput::TextureBuffer(buffer, T::ty())),
TextureBufferOptions::Surface => Ok(ShaderInput::TextureBuffer(buffer, T::ty())),
}
}
fn create_output_buffer(&self, len: usize, ty: Type, cpu_readable: bool) -> Result<WgpuShaderInput> {
log::debug!("Creating output buffer with len: {}", len);
let create_buffer = |usage| {
Ok::<_, anyhow::Error>(self.context.device.create_buffer(&BufferDescriptor {
@ -83,11 +200,11 @@ impl gpu_executor::GpuExecutor for NewExecutor {
};
Ok(buffer)
}
fn create_compute_pass(&self, layout: &gpu_executor::PipelineLayout<Self>, read_back: Option<Arc<ShaderInput<Self::BufferHandle>>>, instances: ComputePassDimensions) -> Result<CommandBuffer> {
fn create_compute_pass(&self, layout: &gpu_executor::PipelineLayout<Self>, read_back: Option<Arc<WgpuShaderInput>>, instances: ComputePassDimensions) -> Result<Self::CommandBuffer> {
let compute_pipeline = self.context.device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
label: None,
layout: None,
module: &layout.shader,
module: &layout.shader.0,
entry_point: layout.entry_point.as_str(),
});
let bind_group_layout = compute_pipeline.get_bind_group_layout(0);
@ -97,11 +214,15 @@ impl gpu_executor::GpuExecutor for NewExecutor {
.buffers
.iter()
.chain(std::iter::once(&layout.output_buffer))
.flat_map(|input| input.buffer())
.flat_map(|input| input.binding())
.enumerate()
.map(|(i, buffer)| wgpu::BindGroupEntry {
binding: i as u32,
resource: buffer.as_entire_binding(),
resource: match buffer {
gpu_executor::BindingType::UniformBuffer(buf) => buf.as_entire_binding(),
gpu_executor::BindingType::StorageBuffer(buf) => buf.as_entire_binding(),
gpu_executor::BindingType::TextureView(buf) => wgpu::BindingResource::TextureView(buf),
},
})
.collect::<Vec<_>>();
@ -123,27 +244,77 @@ impl gpu_executor::GpuExecutor for NewExecutor {
// Sets adds copy operation to command encoder.
// Will copy data from storage buffer on GPU to staging buffer on CPU.
if let Some(buffer) = read_back {
let ShaderInput::ReadBackBuffer(output, ty) = buffer.as_ref() else {
let ShaderInput::ReadBackBuffer(output, _ty) = buffer.as_ref() else {
bail!("Tried to read back from a non read back buffer");
};
let size = output.size();
assert_eq!(size, layout.output_buffer.buffer().unwrap().size());
let ShaderInput::OutputBuffer(output_buffer, ty) = layout.output_buffer.as_ref() else {
bail!("Tried to read back from a non output buffer");
};
assert_eq!(size, output_buffer.size());
assert_eq!(ty, &layout.output_buffer.ty());
encoder.copy_buffer_to_buffer(
layout.output_buffer.buffer().ok_or_else(|| anyhow::anyhow!("Tried to use an non buffer as the shader output"))?,
0,
output,
0,
size,
);
encoder.copy_buffer_to_buffer(output_buffer, 0, output, 0, size);
}
// Submits command encoder for processing
Ok(encoder.finish())
Ok(CommandBufferWrapper(encoder.finish()))
}
fn create_render_pass(&self, texture: Arc<ShaderInput<Self>>, canvas: Arc<SurfaceHandle<wgpu::Surface>>) -> Result<()> {
let texture = texture.texture().expect("Expected texture input");
let texture_view = texture.create_view(&wgpu::TextureViewDescriptor::default());
let output = canvas.as_ref().surface.get_current_texture()?;
let view = output.texture.create_view(&wgpu::TextureViewDescriptor {
format: Some(wgpu::TextureFormat::Rgba8Unorm),
..Default::default()
});
let output_texture_bind_group = self.context.device.create_bind_group(&wgpu::BindGroupDescriptor {
layout: &self.render_configuration.texture_bind_group_layout,
entries: &[
wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(&texture_view),
},
wgpu::BindGroupEntry {
binding: 1,
resource: wgpu::BindingResource::Sampler(&self.render_configuration.sampler),
},
],
label: Some("output_texture_bind_group"),
});
let mut encoder = self.context.device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("Render Encoder") });
{
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: Some("Render Pass"),
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::GREEN),
store: true,
},
})],
depth_stencil_attachment: None,
});
render_pass.set_pipeline(&self.render_configuration.render_pipeline);
render_pass.set_bind_group(0, &output_texture_bind_group, &[]);
render_pass.set_vertex_buffer(0, self.render_configuration.vertex_buffer.slice(..));
render_pass.set_index_buffer(self.render_configuration.index_buffer.slice(..), wgpu::IndexFormat::Uint16);
render_pass.draw_indexed(0..self.render_configuration.num_indices, 0, 0..1);
}
let encoder = encoder.finish();
self.context.queue.submit(Some(encoder));
output.present();
Ok(())
}
fn execute_compute_pipeline(&self, encoder: Self::CommandBuffer) -> Result<()> {
self.context.queue.submit(Some(encoder));
self.context.queue.submit(Some(encoder.0));
// Poll the device in a blocking manner so that our future resolves.
// In an actual application, `device.poll(...)` should
@ -152,7 +323,7 @@ impl gpu_executor::GpuExecutor for NewExecutor {
Ok(())
}
fn read_output_buffer(&self, buffer: Arc<ShaderInput<Self::BufferHandle>>) -> Pin<Box<dyn Future<Output = Result<Vec<u8>>>>> {
fn read_output_buffer(&self, buffer: Arc<ShaderInput<Self>>) -> Pin<Box<dyn Future<Output = Result<Vec<u8>>>>> {
Box::pin(async move {
if let ShaderInput::ReadBackBuffer(buffer, _) = buffer.as_ref() {
let buffer_slice = buffer.slice(..);
@ -187,13 +358,168 @@ impl gpu_executor::GpuExecutor for NewExecutor {
} else {
bail!("Tried to read a non readback buffer")
}
}) as _
})
}
fn create_texture_view(&self, texture: ShaderInput<Self>) -> Result<ShaderInput<Self>> {
//Ok(ShaderInput::TextureView(texture.create_view(&wgpu::TextureViewDescriptor::default()), ) )
let ShaderInput::TextureBuffer(texture, ty) = &texture else {
bail!("Tried to create a texture view from a non texture");
};
let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
Ok(ShaderInput::TextureView(view, ty.clone()))
}
#[cfg(target_arch = "wasm32")]
fn create_surface(&self, canvas: graphene_core::WasmSurfaceHandle) -> Result<SurfaceHandle<wgpu::Surface>> {
let surface = self.context.instance.create_surface_from_canvas(canvas.surface)?;
let surface_caps = surface.get_capabilities(&self.context.adapter);
let surface_format = wgpu::TextureFormat::Rgba8Unorm;
let config = wgpu::SurfaceConfiguration {
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
format: surface_format,
width: 1920,
height: 1080,
present_mode: surface_caps.present_modes[0],
alpha_mode: wgpu::CompositeAlphaMode::PreMultiplied,
view_formats: vec![wgpu::TextureFormat::Rgba8UnormSrgb],
};
surface.configure(&self.context.device, &config);
Ok(SurfaceHandle {
surface_id: canvas.surface_id,
surface,
})
}
#[cfg(not(target_arch = "wasm32"))]
fn create_surface(&self, window: SurfaceHandle<winit::window::Window>) -> Result<SurfaceHandle<wgpu::Surface>> {
let surface = unsafe { self.context.instance.create_surface(&window.surface) }?;
let surface_id = window.surface_id;
Ok(SurfaceHandle { surface_id, surface })
}
}
impl NewExecutor {
impl WgpuExecutor {
pub async fn new() -> Option<Self> {
let context = Context::new().await?;
Some(Self { context })
let texture_bind_group_layout = context.device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
entries: &[
wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Texture {
multisampled: false,
view_dimension: wgpu::TextureViewDimension::D2,
sample_type: wgpu::TextureSampleType::Float { filterable: false },
},
count: None,
},
wgpu::BindGroupLayoutEntry {
binding: 1,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
count: None,
},
],
label: Some("texture_bind_group_layout"),
});
let sampler = context.device.create_sampler(&wgpu::SamplerDescriptor {
address_mode_u: wgpu::AddressMode::ClampToEdge,
address_mode_v: wgpu::AddressMode::ClampToEdge,
address_mode_w: wgpu::AddressMode::ClampToEdge,
mag_filter: wgpu::FilterMode::Nearest,
min_filter: wgpu::FilterMode::Nearest,
mipmap_filter: wgpu::FilterMode::Nearest,
..Default::default()
});
let shader = context.device.create_shader_module(wgpu::ShaderModuleDescriptor {
label: Some("Shader"),
source: wgpu::ShaderSource::Wgsl(include_str!("shader.wgsl").into()),
});
let render_pipeline_layout = context.device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: Some("Render Pipeline Layout"),
bind_group_layouts: &[&texture_bind_group_layout],
push_constant_ranges: &[],
});
let render_pipeline = context.device.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
label: Some("Render Pipeline"),
layout: Some(&render_pipeline_layout),
vertex: wgpu::VertexState {
module: &shader,
entry_point: "vs_main",
buffers: &[Vertex::desc()],
},
fragment: Some(wgpu::FragmentState {
module: &shader,
entry_point: "fs_main",
targets: &[Some(wgpu::ColorTargetState {
format: wgpu::TextureFormat::Rgba8Unorm,
blend: Some(wgpu::BlendState {
color: wgpu::BlendComponent::REPLACE,
alpha: wgpu::BlendComponent::REPLACE,
}),
write_mask: wgpu::ColorWrites::ALL,
})],
}),
primitive: wgpu::PrimitiveState {
topology: wgpu::PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: wgpu::FrontFace::Ccw,
cull_mode: None,
// Setting this to anything other than Fill requires Features::POLYGON_MODE_LINE
// or Features::POLYGON_MODE_POINT
polygon_mode: wgpu::PolygonMode::Fill,
// Requires Features::DEPTH_CLIP_CONTROL
unclipped_depth: false,
// Requires Features::CONSERVATIVE_RASTERIZATION
conservative: false,
},
depth_stencil: None,
multisample: wgpu::MultisampleState {
count: 1,
mask: !0,
alpha_to_coverage_enabled: false,
},
// If the pipeline will be used with a multiview render pass, this
// indicates how many array layers the attachments will have.
multiview: None,
});
let vertex_buffer = context.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Vertex Buffer"),
contents: bytemuck::cast_slice(VERTICES),
usage: wgpu::BufferUsages::VERTEX,
});
let index_buffer = context.device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: Some("Index Buffer"),
contents: bytemuck::cast_slice(INDICES),
usage: wgpu::BufferUsages::INDEX,
});
let num_indices = INDICES.len() as u32;
let render_configuration = RenderConfiguration {
vertex_buffer,
index_buffer,
num_indices,
render_pipeline,
texture_bind_group_layout,
sampler,
};
Some(Self { context, render_configuration })
}
}
#[derive(Debug)]
struct RenderConfiguration {
vertex_buffer: wgpu::Buffer,
index_buffer: wgpu::Buffer,
num_indices: u32,
render_pipeline: wgpu::RenderPipeline,
texture_bind_group_layout: wgpu::BindGroupLayout,
sampler: wgpu::Sampler,
}