New node: Merge by Distance (#2307)

* add `merge_by_distance` node * remove unneeded features of petgraph Also re-arrange module slightly since because functions are no longer async, there was name shadowning with the old arrangement. * remove region if start or end segment is removed * remove stuff I ended up not using * "algos" -> "algorithms" * Code review pass --------- Co-authored-by: Keavon Chambers <keavon@keavon.com>
2025-07-08 00:05:00 +00:00 · 2025-03-13 03:40:44 +00:00 · 2025-03-13 03:40:44 +00:00 · 0570edc463
commit 0570edc463
parent 2d18378823
10 changed files with 291 additions and 12 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1751,6 +1751,12 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"

+[[package]]
+name = "fixedbitset"
+version = "0.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
+
 [[package]]
 name = "flate2"
 version = "1.1.0"
@ -2482,8 +2488,10 @@ dependencies = [
 "node-macro",
 "num-derive",
 "num-traits",
+ "petgraph 0.7.1",
 "rand 0.9.0",
 "rand_chacha 0.9.0",
+ "rustc-hash 2.1.1",
 "rustybuzz 0.20.1",
 "serde",
 "serde_json",
@ -3916,7 +3924,7 @@ dependencies = [
 "hexf-parse",
 "indexmap 2.7.1",
 "log",
- "petgraph",
+ "petgraph 0.6.5",
 "rustc-hash 1.1.0",
 "spirv",
 "termcolor",
@ -4659,7 +4667,17 @@ version = "0.6.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
- "fixedbitset",
+ "fixedbitset 0.4.2",
+ "indexmap 2.7.1",
+]
+
+[[package]]
+name = "petgraph"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
+dependencies = [
+ "fixedbitset 0.5.7",
 "indexmap 2.7.1",
 ]

@ -7217,7 +7235,7 @@ dependencies = [
 "memchr",
 "nom",
 "once_cell",
- "petgraph",
+ "petgraph 0.6.5",
 ]

 [[package]]
--- a/Cargo.toml
+++ b/Cargo.toml
@ -95,6 +95,9 @@ syn = { version = "2.0", default-features = false, features = [
 	"derive",
 ] }
 kurbo = { version = "0.11.0", features = ["serde"] }
+petgraph = { version = "0.7.1", default-features = false, features = [
+	"graphmap",
+] }

 [profile.dev]
 opt-level = 1
--- a/node-graph/gcore/Cargo.toml
+++ b/node-graph/gcore/Cargo.toml
@ -50,6 +50,11 @@ glam = { workspace = true, default-features = false, features = [
 	"scalar-math",
 ] }
 serde_json = { workspace = true }
+petgraph = { workspace = true, default-features = false, features = [
+	"graphmap",
+] }
+rustc-hash = { workspace = true }
+math-parser = { path = "../../libraries/math-parser" }

 # Required dependencies
 half = { version = "2.4.1", default-features = false, features = ["bytemuck"] }
@ -77,7 +82,6 @@ web-sys = { workspace = true, optional = true, features = [
 image = { workspace = true, optional = true, default-features = false, features = [
 	"png",
 ] }
-math-parser = { path = "../../libraries/math-parser" }

 [dev-dependencies]
 # Workspace dependencies
--- a/node-graph/gcore/src/vector/algorithms/merge_by_distance.rs
+++ b/node-graph/gcore/src/vector/algorithms/merge_by_distance.rs
@ -0,0 +1,95 @@
+use crate::vector::{PointId, VectorData, VectorDataIndex};
+use glam::DVec2;
+use petgraph::prelude::UnGraphMap;
+use rustc_hash::FxHashSet;
+
+impl VectorData {
+	/// Collapse all points with edges shorter than the specified distance
+	pub(crate) fn merge_by_distance(&mut self, distance: f64) {
+		// Treat self as an undirected graph
+		let indices = VectorDataIndex::build_from(self);
+
+		// Graph containing only short edges, referencing the data graph
+		let mut short_edges = UnGraphMap::new();
+		for segment_id in self.segment_ids().iter().copied() {
+			let length = indices.segment_chord_length(segment_id);
+			if length < distance {
+				let [start, end] = indices.segment_ends(segment_id);
+				let start = indices.point_graph.node_weight(start).unwrap().id;
+				let end = indices.point_graph.node_weight(end).unwrap().id;
+
+				short_edges.add_node(start);
+				short_edges.add_node(end);
+				short_edges.add_edge(start, end, segment_id);
+			}
+		}
+
+		// Group connected segments to collapse them into a single point
+		// TODO: there are a few possible algorithms for this - perhaps test empirically to find fastest
+		let collapse: Vec<FxHashSet<PointId>> = petgraph::algo::tarjan_scc(&short_edges).into_iter().map(|connected| connected.into_iter().collect()).collect();
+		let average_position = collapse
+			.iter()
+			.map(|collapse_set| {
+				let sum: DVec2 = collapse_set.iter().map(|&id| indices.point_position(id, self)).sum();
+				sum / collapse_set.len() as f64
+			})
+			.collect::<Vec<_>>();
+
+		// Collect points and segments to delete at the end to avoid invalidating indices
+		let mut points_to_delete = FxHashSet::default();
+		let mut segments_to_delete = FxHashSet::default();
+		for (mut collapse_set, average_pos) in collapse.into_iter().zip(average_position.into_iter()) {
+			// Remove any segments where both endpoints are in the collapse set
+			segments_to_delete.extend(self.segment_domain.iter().filter_map(|(id, start_offset, end_offset, _)| {
+				let start = self.point_domain.ids()[start_offset];
+				let end = self.point_domain.ids()[end_offset];
+				if collapse_set.contains(&start) && collapse_set.contains(&end) { Some(id) } else { None }
+			}));
+
+			// Delete all points but the first, set its position to the average, and update segments
+			let first_id = collapse_set.iter().copied().next().unwrap();
+			collapse_set.remove(&first_id);
+			let first_offset = indices.point_to_offset[&first_id];
+
+			// Look for segments with endpoints in `collapse_set` and replace them with the point we are collapsing to
+			for (_, start_offset, end_offset, handles) in self.segment_domain.iter_mut() {
+				let start_id = self.point_domain.ids()[*start_offset];
+				let end_id = self.point_domain.ids()[*end_offset];
+
+				// Update Bezier handles for moved points
+				if start_id == first_id {
+					let point_position = self.point_domain.position[*start_offset];
+					handles.move_start(average_pos - point_position);
+				}
+				if end_id == first_id {
+					let point_position = self.point_domain.position[*end_offset];
+					handles.move_end(average_pos - point_position);
+				}
+
+				// Replace removed points with the collapsed point
+				if collapse_set.contains(&start_id) {
+					let point_position = self.point_domain.position[*start_offset];
+					*start_offset = first_offset;
+					handles.move_start(average_pos - point_position);
+				}
+				if collapse_set.contains(&end_id) {
+					let point_position = self.point_domain.position[*end_offset];
+					*end_offset = first_offset;
+					handles.move_end(average_pos - point_position);
+				}
+			}
+
+			// Update the position of the collapsed point
+			self.point_domain.position[first_offset] = average_pos;
+
+			points_to_delete.extend(collapse_set)
+		}
+
+		// Remove faces whose start or end segments are removed
+		// TODO: Adjust faces and only delete if all (or all but one) segments are removed
+		self.region_domain
+			.retain_with_region(|_, segment_range| segments_to_delete.contains(segment_range.start()) || segments_to_delete.contains(segment_range.end()));
+		self.segment_domain.retain(|id| !segments_to_delete.contains(id), usize::MAX);
+		self.point_domain.retain(&mut self.segment_domain, |id| !points_to_delete.contains(id));
+	}
+}
--- a/node-graph/gcore/src/vector/algorithms/mod.rs
+++ b/node-graph/gcore/src/vector/algorithms/mod.rs
@ -0,0 +1 @@
+mod merge_by_distance;
--- a/node-graph/gcore/src/vector/mod.rs
+++ b/node-graph/gcore/src/vector/mod.rs
@ -1,3 +1,4 @@
+mod algorithms;
 pub mod brush_stroke;
 pub mod generator_nodes;
 pub mod misc;
--- a/node-graph/gcore/src/vector/vector_data.rs
+++ b/node-graph/gcore/src/vector/vector_data.rs
@ -1,4 +1,5 @@
 mod attributes;
+mod indexed;
 mod modification;

 use super::style::{PathStyle, Stroke};
@ -9,6 +10,7 @@ use bezier_rs::ManipulatorGroup;
 use core::borrow::Borrow;
 use dyn_any::DynAny;
 use glam::{DAffine2, DVec2};
+pub use indexed::VectorDataIndex;
 pub use modification::*;
 use std::collections::HashMap;

@ -68,6 +70,8 @@ pub type VectorDataTable = Instances<VectorData>;

 /// [VectorData] is passed between nodes.
 /// It contains a list of subpaths (that may be open or closed), a transform, and some style information.
+///
+/// Segments are connected if they share endpoints.
 #[derive(Clone, Debug, PartialEq, DynAny)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct VectorData {
@ -265,6 +269,11 @@ impl VectorData {
 		index.flat_map(|index| self.segment_domain.connected_points(index).map(|index| self.point_domain.ids()[index]))
 	}

+	/// Get an array slice of all segment IDs.
+	pub fn segment_ids(&self) -> &[SegmentId] {
+		self.segment_domain.ids()
+	}
+
 	/// Enumerate all segments that start at the point.
 	pub fn start_connected(&self, point: PointId) -> impl Iterator<Item = SegmentId> + '_ {
 		let index = [self.point_domain.resolve_id(point)].into_iter().flatten();
--- a/node-graph/gcore/src/vector/vector_data/attributes.rs
+++ b/node-graph/gcore/src/vector/vector_data/attributes.rs
@ -1,4 +1,6 @@
 use crate::vector::vector_data::{HandleId, VectorData};
+use bezier_rs::BezierHandles;
+use core::iter::zip;
 use dyn_any::DynAny;
 use glam::{DAffine2, DVec2};
 use std::collections::HashMap;
@ -80,7 +82,7 @@ impl core::hash::BuildHasher for NoHashBuilder {
 pub struct PointDomain {
 	id: Vec<PointId>,
 	#[serde(alias = "positions")]
-	position: Vec<DVec2>,
+	pub(crate) position: Vec<DVec2>,
 }

 impl core::hash::Hash for PointDomain {
@ -112,7 +114,8 @@ impl PointDomain {
 				id_map.push(new_index);
 				new_index += 1;
 			} else {
-				id_map.push(usize::MAX); // A placeholder for invalid ids. This is checked after the segment domain is modified.
+				// A placeholder for invalid IDs. This is checked after the segment domain is modified.
+				id_map.push(usize::MAX);
 			}
 		}

@ -176,6 +179,11 @@ impl PointDomain {
 			*pos = transform.transform_point2(*pos);
 		}
 	}
+
+	/// Iterate over point IDs and positions
+	pub fn iter(&self) -> impl Iterator<Item = (PointId, DVec2)> + '_ {
+		self.ids().iter().copied().zip(self.positions().iter().copied())
+	}
 }

 #[derive(Clone, Debug, Default, PartialEq, Hash, DynAny)]
@ -343,6 +351,7 @@ impl SegmentDomain {
 		})
 	}

+	/// Get index from ID by linear search. Takes `O(n)` time.
 	fn id_to_index(&self, id: SegmentId) -> Option<usize> {
 		debug_assert_eq!(self.id.len(), self.handles.len());
 		debug_assert_eq!(self.id.len(), self.start_point.len());
@ -397,11 +406,34 @@ impl SegmentDomain {
 	pub(crate) fn connected_count(&self, point: usize) -> usize {
 		self.all_connected(point).count()
 	}
+
+	/// Iterates over segments in the domain.
+	///
+	/// Tuple is: (id, start point, end point, handles)
+	pub(crate) fn iter(&self) -> impl Iterator<Item = (SegmentId, usize, usize, BezierHandles)> + '_ {
+		let ids = self.id.iter().copied();
+		let start_point = self.start_point.iter().copied();
+		let end_point = self.end_point.iter().copied();
+		let handles = self.handles.iter().copied();
+		zip(ids, zip(start_point, zip(end_point, handles))).map(|(id, (start_point, (end_point, handles)))| (id, start_point, end_point, handles))
+	}
+
+	/// Iterates over segments in the domain, mutably.
+	///
+	/// Tuple is: (id, start point, end point, handles)
+	pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = (&mut SegmentId, &mut usize, &mut usize, &mut BezierHandles)> + '_ {
+		let ids = self.id.iter_mut();
+		let start_point = self.start_point.iter_mut();
+		let end_point = self.end_point.iter_mut();
+		let handles = self.handles.iter_mut();
+		zip(ids, zip(start_point, zip(end_point, handles))).map(|(id, (start_point, (end_point, handles)))| (id, start_point, end_point, handles))
+	}
 }

 #[derive(Clone, Debug, Default, PartialEq, Hash, DynAny)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
-/// Stores data which is per-region. A region is an enclosed area composed of a range of segments from the [`SegmentDomain`] that can be given a fill. In future this will be extendable at runtime with custom attributes.
+/// Stores data which is per-region. A region is an enclosed area composed of a range of segments from the
+/// [`SegmentDomain`] that can be given a fill. In future this will be extendable at runtime with custom attributes.
 pub struct RegionDomain {
 	#[serde(alias = "ids")]
 	id: Vec<RegionId>,
@ -432,6 +464,19 @@ impl RegionDomain {
 		self.id.retain(&f);
 	}

+	/// Like [`Self::retain`] but also gives the function access to the segment range.
+	///
+	/// Note that this function requires an allocation that `retain` avoids.
+	pub fn retain_with_region(&mut self, f: impl Fn(&RegionId, &core::ops::RangeInclusive<SegmentId>) -> bool) {
+		let keep = self.id.iter().zip(self.segment_range.iter()).map(|(id, range)| f(id, range)).collect::<Vec<_>>();
+		let mut iter = keep.iter().copied();
+		self.segment_range.retain(|_| iter.next().unwrap());
+		let mut iter = keep.iter().copied();
+		self.fill.retain(|_| iter.next().unwrap());
+		let mut iter = keep.iter().copied();
+		self.id.retain(|_| iter.next().unwrap());
+	}
+
 	pub fn push(&mut self, id: RegionId, segment_range: core::ops::RangeInclusive<SegmentId>, fill: FillId) {
 		if self.id.contains(&id) {
 			warn!("Duplicate region");
--- a/node-graph/gcore/src/vector/vector_data/indexed.rs
+++ b/node-graph/gcore/src/vector/vector_data/indexed.rs
@ -0,0 +1,90 @@
+use super::{PointId, SegmentId, VectorData};
+use glam::DVec2;
+use petgraph::graph::{EdgeIndex, NodeIndex, UnGraph};
+use rustc_hash::FxHashMap;
+
+/// All the fixed fields of a point from the point domain.
+pub struct Point {
+	pub id: PointId,
+	pub position: DVec2,
+}
+
+/// Useful indexes to speed up various operations on `VectorData`.
+///
+/// Important: It is the user's responsibility to ensure the indexes remain valid after mutations to the data.
+pub struct VectorDataIndex {
+	/// Points and segments form a graph. Store it here in a form amenable to graph algorithms.
+	///
+	/// Currently, segment data is not stored as it is not used, but it could easily be added.
+	pub(crate) point_graph: UnGraph<Point, ()>,
+	pub(crate) segment_to_edge: FxHashMap<SegmentId, EdgeIndex>,
+	/// Get the offset from the point ID.
+	pub(crate) point_to_offset: FxHashMap<PointId, usize>,
+	// TODO: faces
+}
+
+impl VectorDataIndex {
+	/// Construct a [`VectorDataIndex`] by building indexes from the given [`VectorData`]. Takes `O(n)` time.
+	pub fn build_from(data: &VectorData) -> Self {
+		let point_to_offset = data.point_domain.ids().iter().copied().enumerate().map(|(a, b)| (b, a)).collect::<FxHashMap<_, _>>();
+
+		let mut point_to_node = FxHashMap::default();
+		let mut segment_to_edge = FxHashMap::default();
+
+		let mut graph = UnGraph::new_undirected();
+
+		for (point_id, position) in data.point_domain.iter() {
+			let idx = graph.add_node(Point { id: point_id, position });
+			point_to_node.insert(point_id, idx);
+		}
+
+		for (segment_id, start_offset, end_offset, ..) in data.segment_domain.iter() {
+			let start_id = data.point_domain.ids()[start_offset];
+			let end_id = data.point_domain.ids()[end_offset];
+			let edge = graph.add_edge(point_to_node[&start_id], point_to_node[&end_id], ());
+
+			segment_to_edge.insert(segment_id, edge);
+		}
+
+		Self {
+			point_graph: graph,
+			segment_to_edge,
+			point_to_offset,
+		}
+	}
+
+	/// Fetch the length of given segment's chord. Takes `O(1)` time.
+	///
+	/// # Panics
+	///
+	/// Will panic if no segment with the given ID is found.
+	pub fn segment_chord_length(&self, id: SegmentId) -> f64 {
+		let edge_idx = self.segment_to_edge[&id];
+		let (start, end) = self.point_graph.edge_endpoints(edge_idx).unwrap();
+		let start_position = self.point_graph.node_weight(start).unwrap().position;
+		let end_position = self.point_graph.node_weight(end).unwrap().position;
+		(start_position - end_position).length()
+	}
+
+	/// Get the ends of a segment. Takes `O(1)` time.
+	///
+	/// The IDs will be ordered [smallest, largest] so they can be used to find other segments with the same endpoints, regardless of direction.
+	///
+	/// # Panics
+	///
+	/// This function will panic if the ID is not present.
+	pub fn segment_ends(&self, id: SegmentId) -> [NodeIndex; 2] {
+		let (start, end) = self.point_graph.edge_endpoints(self.segment_to_edge[&id]).unwrap();
+		if start < end { [start, end] } else { [end, start] }
+	}
+
+	/// Get the physical location of a point. Takes `O(1)` time.
+	///
+	/// # Panics
+	///
+	/// Will panic if `id` isn't in the data.
+	pub fn point_position(&self, id: PointId, data: &VectorData) -> DVec2 {
+		let offset = self.point_to_offset[&id];
+		data.point_domain.positions()[offset]
+	}
+}
--- a/node-graph/gcore/src/vector/vector_nodes.rs
+++ b/node-graph/gcore/src/vector/vector_nodes.rs
@ -886,9 +886,9 @@ fn bevel_algorithm(mut vector_data: VectorData, vector_data_transform: DAffine2,
 		bezier.split(bezier_rs::TValue::Parametric(parametric))[1]
 	}

-	/// Produces a list that correspons with the point id. The value is how many segments are connected.
-	fn segments_connected_count(vector_data: &VectorData) -> Vec<u8> {
-		// Count the number of segments connectign to each point.
+	/// Produces a list that corresponds with the point ID. The value is how many segments are connected.
+	fn segments_connected_count(vector_data: &VectorData) -> Vec<usize> {
+		// Count the number of segments connecting to each point.
 		let mut segments_connected_count = vec![0; vector_data.point_domain.ids().len()];
 		for &point_index in vector_data.segment_domain.start_point().iter().chain(vector_data.segment_domain.end_point()) {
 			segments_connected_count[point_index] += 1;
@ -904,7 +904,7 @@ fn bevel_algorithm(mut vector_data: VectorData, vector_data_transform: DAffine2,
 	}

 	/// Updates the index so that it points at a point with the position. If nobody else will look at the index, the original point is updated. Otherwise a new point is created.
-	fn create_or_modify_point(point_domain: &mut PointDomain, segments_connected_count: &mut [u8], pos: DVec2, index: &mut usize, next_id: &mut PointId, new_segments: &mut Vec<[usize; 2]>) {
+	fn create_or_modify_point(point_domain: &mut PointDomain, segments_connected_count: &mut [usize], pos: DVec2, index: &mut usize, next_id: &mut PointId, new_segments: &mut Vec<[usize; 2]>) {
 		segments_connected_count[*index] -= 1;
 		if segments_connected_count[*index] == 0 {
 			// If nobody else is going to look at this point, we're alright to modify it
@ -922,7 +922,7 @@ fn bevel_algorithm(mut vector_data: VectorData, vector_data_transform: DAffine2,
 		}
 	}

-	fn update_existing_segments(vector_data: &mut VectorData, vector_data_transform: DAffine2, distance: f64, segments_connected: &mut [u8]) -> Vec<[usize; 2]> {
+	fn update_existing_segments(vector_data: &mut VectorData, vector_data_transform: DAffine2, distance: f64, segments_connected: &mut [usize]) -> Vec<[usize; 2]> {
 		let mut next_id = vector_data.point_domain.next_id();
 		let mut new_segments = Vec::new();

@ -995,6 +995,19 @@ fn bevel(_: impl Ctx, source: VectorDataTable, #[default(10.)] distance: Length)
 	result
 }

+#[node_macro::node(name("Merge by Distance"), category("Vector"), path(graphene_core::vector))]
+fn merge_by_distance(_: impl Ctx, source: VectorDataTable, #[default(10.)] distance: Length) -> VectorDataTable {
+	let source_transform = source.transform();
+	let mut source = source.one_instance().instance.clone();
+
+	source.merge_by_distance(distance);
+
+	let mut result = VectorDataTable::new(source);
+	*result.transform_mut() = source_transform;
+
+	result
+}
+
 #[node_macro::node(category("Vector"), path(graphene_core::vector))]
 async fn area(ctx: impl Ctx + CloneVarArgs + ExtractAll, vector_data: impl Node<Context<'static>, Output = VectorDataTable>) -> f64 {
 	let new_ctx = OwnedContextImpl::from(ctx).with_footprint(Footprint::default()).into_context();