Only dispatch each workgroup not pixel

2025-08-03 21:08:18 +00:00 · 2023-06-26 21:08:43 +02:00 · 2023-06-26 21:08:43 +02:00 · edb33e0c82
commit edb33e0c82
parent 215589ff49
8 changed files with 1076 additions and 75 deletions
--- a/node-graph/gcore/src/raster/adjustments.rs
+++ b/node-graph/gcore/src/raster/adjustments.rs
@ -335,11 +335,11 @@ fn hue_shift_color_node(color: Color, hue_shift: f32, saturation_shift: f32, lig
 	let [hue, saturation, lightness, alpha] = color.to_hsla();

 	let color = Color::from_hsla(
-		(hue + hue_shift as f32 / 360.) % 1.,
+		(hue + hue_shift / 360.) % 1.,
 		// TODO: Improve the way saturation works (it's slightly off)
-		(saturation + saturation_shift as f32 / 100.).clamp(0., 1.),
+		(saturation + saturation_shift / 100.).clamp(0., 1.),
 		// TODO: Fix the way lightness works (it's very off)
-		(lightness + lightness_shift as f32 / 100.).clamp(0., 1.),
+		(lightness + lightness_shift / 100.).clamp(0., 1.),
 		alpha,
 	);

--- a/node-graph/gpu-compiler/src/lib.rs
+++ b/node-graph/gpu-compiler/src/lib.rs
@ -202,7 +202,7 @@ pub fn serialize_gpu(networks: &[ProtoNetwork], io: &ShaderIO) -> anyhow::Result
 	context.insert("input_nodes", &input_nodes);
 	context.insert("output_nodes", &output_nodes);
 	context.insert("nodes", &nodes);
-	context.insert("compute_threads", &64);
+	context.insert("compute_threads", "12, 8");
 	Ok(tera.render("spirv", &context)?)
 }

--- a/node-graph/gpu-executor/src/lib.rs
+++ b/node-graph/gpu-executor/src/lib.rs
@ -357,12 +357,23 @@ where

 /// A struct representing a compute pipeline.
 pub struct PipelineLayout<E: GpuExecutor + ?Sized> {
-	pub shader: E::ShaderHandle,
+	pub shader: Arc<E::ShaderHandle>,
 	pub entry_point: String,
-	pub bind_group: Bindgroup<E>,
+	pub bind_group: Arc<Bindgroup<E>>,
 	pub output_buffer: Arc<ShaderInput<E>>,
 }

+impl<E: GpuExecutor + ?Sized> Clone for PipelineLayout<E> {
+	fn clone(&self) -> Self {
+		Self {
+			shader: self.shader.clone(),
+			entry_point: self.entry_point.clone(),
+			bind_group: self.bind_group.clone(),
+			output_buffer: self.output_buffer.clone(),
+		}
+	}
+}
+
 unsafe impl<E: GpuExecutor + ?Sized + StaticType> StaticType for PipelineLayout<E>
 where
 	E::Static: GpuExecutor,
@ -457,9 +468,9 @@ pub struct CreatePipelineLayoutNode<_E, EntryPoint, Bindgroup, OutputBuffer> {
 #[node_macro::node_fn(CreatePipelineLayoutNode<_E>)]
 async fn create_pipeline_layout_node<_E: GpuExecutor>(shader: _E::ShaderHandle, entry_point: String, bind_group: Bindgroup<_E>, output_buffer: Arc<ShaderInput<_E>>) -> PipelineLayout<_E> {
 	PipelineLayout {
-		shader,
+		shader: shader.into(),
 		entry_point,
-		bind_group,
+		bind_group: bind_group.into(),
 		output_buffer,
 	}
 }
--- a/node-graph/graphene-cli/src/main.rs
+++ b/node-graph/graphene-cli/src/main.rs
@ -44,7 +44,7 @@ async fn main() -> Result<(), Box<dyn Error>> {

 	let device = application_io.gpu_executor().unwrap().context.device.clone();
 	std::thread::spawn(move || loop {
-		std::thread::sleep(std::time::Duration::from_millis(1));
+		std::thread::sleep(std::time::Duration::from_nanos(1));
 		device.poll(wgpu::Maintain::Poll);
 	});

--- a/node-graph/graphene-cli/test_files/gray_no_cache.graphite
+++ b/node-graph/graphene-cli/test_files/gray_no_cache.graphite
@ -310,7 +310,7 @@
                                                        ],
                                                        "outputs": [
                                                            {
-                                                                "node_id": 2,
+                                                                "node_id": 1,
                                                                "node_output_index": 0
                                                            }
                                                        ],
@ -341,39 +341,6 @@
                                                                },
                                                                "path": null
                                                            },
-                                                            "2": {
-                                                                "name": "Cache",
-                                                                "inputs": [
-                                                                    {
-                                                                        "ShortCircut": {
-                                                                            "Concrete": {
-                                                                                "name": "()",
-                                                                                "size": 0,
-                                                                                "align": 1
-                                                                            }
-                                                                        }
-                                                                    },
-                                                                    {
-                                                                        "Node": {
-                                                                            "node_id": 1,
-                                                                            "output_index": 0,
-                                                                            "lambda": false
-                                                                        }
-                                                                    }
-                                                                ],
-                                                                "implementation": {
-                                                                    "Unresolved": {
-                                                                        "name": "graphene_core::memo::MemoNode<_, _>"
-                                                                    }
-                                                                },
-                                                                "metadata": {
-                                                                    "position": [
-                                                                        0,
-                                                                        0
-                                                                    ]
-                                                                },
-                                                                "path": null
-                                                            },
                                                            "1": {
                                                                "name": "Upload Texture",
                                                                "inputs": [
--- a/node-graph/graphene-cli/test_files/id_no_cache.graphite
+++ b/node-graph/graphene-cli/test_files/id_no_cache.graphite
--- a/node-graph/gstd/src/gpu_nodes.rs
+++ b/node-graph/gstd/src/gpu_nodes.rs
@ -8,6 +8,8 @@ use graphene_core::raster::*;
 use graphene_core::*;
 use wgpu_executor::WgpuExecutor;

+use std::cell::RefCell;
+use std::collections::HashMap;
 use std::sync::Arc;

 use crate::wasm_application_io::WasmApplicationIo;
@ -42,11 +44,69 @@ async fn compile_gpu(node: &'input DocumentNode, mut typing_context: TypingConte
 pub struct MapGpuNode<Node, EditorApi> {
 	node: Node,
 	editor_api: EditorApi,
+	cache: RefCell<HashMap<String, ComputePass<WgpuExecutor>>>,
 }

-#[node_macro::node_fn(MapGpuNode)]
+struct ComputePass<T: GpuExecutor> {
+	pipeline_layout: PipelineLayout<T>,
+	readback_buffer: Option<Arc<ShaderInput<T>>>,
+}
+
+impl<T: GpuExecutor> Clone for ComputePass<T> {
+	fn clone(&self) -> Self {
+		Self {
+			pipeline_layout: self.pipeline_layout.clone(),
+			readback_buffer: self.readback_buffer.clone(),
+		}
+	}
+}
+
+#[node_macro::node_impl(MapGpuNode)]
 async fn map_gpu<'a: 'input>(image: ImageFrame<Color>, node: DocumentNode, editor_api: graphene_core::application_io::EditorApi<'a, WasmApplicationIo>) -> ImageFrame<Color> {
 	log::debug!("Executing gpu node");
+	let executor = &editor_api.application_io.gpu_executor.as_ref().unwrap();
+
+	// TODO: The cache should be based on the network topology not the node name
+	let compute_pass_descriptor = self
+		.cache
+		.borrow_mut()
+		.entry(node.name.clone())
+		.or_insert_with(|| futures::executor::block_on(create_compute_pass_descriptor(node, &image, executor)))
+		.clone();
+
+	let compute_pass = executor
+		.create_compute_pass(
+			&compute_pass_descriptor.pipeline_layout,
+			compute_pass_descriptor.readback_buffer.clone(),
+			ComputePassDimensions::XY(image.image.width / 12 + 1, image.image.height / 8 + 1),
+		)
+		.unwrap();
+	executor.execute_compute_pipeline(compute_pass).unwrap();
+	log::error!("executed pipeline");
+	log::debug!("reading buffer");
+	let result = executor.read_output_buffer(compute_pass_descriptor.readback_buffer.clone().unwrap()).await.unwrap();
+	let colors = bytemuck::pod_collect_to_vec::<u8, Color>(result.as_slice());
+	ImageFrame {
+		image: Image {
+			data: colors,
+			width: image.image.width,
+			height: image.image.height,
+		},
+		transform: image.transform,
+	}
+}
+
+impl<Node, EditorApi> MapGpuNode<Node, EditorApi> {
+	pub fn new(node: Node, editor_api: EditorApi) -> Self {
+		Self {
+			node,
+			editor_api,
+			cache: RefCell::new(HashMap::new()),
+		}
+	}
+}
+
+async fn create_compute_pass_descriptor(node: DocumentNode, image: &ImageFrame<Color>, executor: &&WgpuExecutor) -> ComputePass<WgpuExecutor> {
 	let compiler = graph_craft::graphene_compiler::Compiler {};
 	let inner_network = NodeNetwork::value_network(node);

@ -68,7 +128,7 @@ async fn map_gpu<'a: 'input>(image: ImageFrame<Color>, node: DocumentNode, edito
 				implementation: DocumentNodeImplementation::Unresolved("graphene_core::value::CopiedNode".into()),
 				..Default::default()
 			},*/
-			/*
+				/*
 			DocumentNode {
 				name: "GetNode".into(),
 				inputs: vec![NodeInput::node(1, 0), NodeInput::node(0, 0)],
@ -124,8 +184,6 @@ async fn map_gpu<'a: 'input>(image: ImageFrame<Color>, node: DocumentNode, edito
 	//return ImageFrame::empty();
 	let len: usize = image.image.data.len();

-	let executor = &editor_api.application_io.gpu_executor.as_ref().unwrap();
-
 	/*
 	let canvas = editor_api.application_io.create_surface();

@ -175,36 +233,18 @@ async fn map_gpu<'a: 'input>(image: ImageFrame<Color>, node: DocumentNode, edito
 	let shader = executor.load_shader(shader).unwrap();
 	log::debug!("loaded shader");
 	let pipeline = PipelineLayout {
-		shader,
+		shader: shader.into(),
 		entry_point: "eval".to_string(),
-		bind_group,
+		bind_group: bind_group.into(),
 		output_buffer: output_buffer.clone(),
 	};
 	log::debug!("created pipeline");
-	let compute_pass = executor
-		.create_compute_pass(&pipeline, Some(readback_buffer.clone()), ComputePassDimensions::XY(image.image.width, image.image.height))
-		.unwrap();
-	executor.execute_compute_pipeline(compute_pass).unwrap();
-	log::debug!("executed pipeline");
-	log::debug!("reading buffer");
-	let result = executor.read_output_buffer(readback_buffer).await.unwrap();
-	let colors = bytemuck::pod_collect_to_vec::<u8, Color>(result.as_slice());
-	ImageFrame {
-		image: Image {
-			data: colors,
-			width: image.image.width,
-			height: image.image.height,
-		},
-		transform: image.transform,
-	}

-	/*
-	let executor: GpuExecutor = GpuExecutor::new(Context::new().await.unwrap(), shader.into(), "gpu::eval".into()).unwrap();
-	let data: Vec<_> = input.into_iter().collect();
-	let result = executor.execute(Box::new(data)).unwrap();
-	let result = dyn_any::downcast::<Vec<_O>>(result).unwrap();
-	*result
-	*/
+	let compute_pass_descriptor = ComputePass {
+		pipeline_layout: pipeline,
+		readback_buffer: Some(readback_buffer.clone()),
+	};
+	compute_pass_descriptor
 }
 /*
 #[node_macro::node_fn(MapGpuNode)]
@ -414,9 +454,9 @@ async fn blend_gpu_image(foreground: ImageFrame<Color>, background: ImageFrame<C
 	let shader = executor.load_shader(shader).unwrap();
 	log::debug!("loaded shader");
 	let pipeline = PipelineLayout {
-		shader,
+		shader: shader.into(),
 		entry_point: "eval".to_string(),
-		bind_group,
+		bind_group: bind_group.into(),
 		output_buffer: output_buffer.clone(),
 	};
 	log::debug!("created pipeline");
--- a/node-graph/wgpu-executor/src/lib.rs
+++ b/node-graph/wgpu-executor/src/lib.rs
@ -253,7 +253,7 @@ impl gpu_executor::GpuExecutor for WgpuExecutor {
 			entries: entries.as_slice(),
 		});

-		let mut encoder = self.context.device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+		let mut encoder = self.context.device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: Some("compute encoder") });
 		{
 			let dimensions = instances.get();
 			let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { label: None });