New node: Merge by Distance (#2307)

* add `merge_by_distance` node

* remove unneeded features of petgraph

Also re-arrange module slightly since because functions are no longer
async, there was name shadowning with the old arrangement.

* remove region if start or end segment is removed

* remove stuff I ended up not using

* "algos" -> "algorithms"

* Code review pass

---------

Co-authored-by: Keavon Chambers <keavon@keavon.com>
This commit is contained in:
Richard Dodd 2025-03-13 03:40:44 +00:00 committed by GitHub
parent 2d18378823
commit 0570edc463
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 291 additions and 12 deletions

24
Cargo.lock generated
View file

@ -1751,6 +1751,12 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "fixedbitset"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
[[package]]
name = "flate2"
version = "1.1.0"
@ -2482,8 +2488,10 @@ dependencies = [
"node-macro",
"num-derive",
"num-traits",
"petgraph 0.7.1",
"rand 0.9.0",
"rand_chacha 0.9.0",
"rustc-hash 2.1.1",
"rustybuzz 0.20.1",
"serde",
"serde_json",
@ -3916,7 +3924,7 @@ dependencies = [
"hexf-parse",
"indexmap 2.7.1",
"log",
"petgraph",
"petgraph 0.6.5",
"rustc-hash 1.1.0",
"spirv",
"termcolor",
@ -4659,7 +4667,17 @@ version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
dependencies = [
"fixedbitset",
"fixedbitset 0.4.2",
"indexmap 2.7.1",
]
[[package]]
name = "petgraph"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
dependencies = [
"fixedbitset 0.5.7",
"indexmap 2.7.1",
]
@ -7217,7 +7235,7 @@ dependencies = [
"memchr",
"nom",
"once_cell",
"petgraph",
"petgraph 0.6.5",
]
[[package]]

View file

@ -95,6 +95,9 @@ syn = { version = "2.0", default-features = false, features = [
"derive",
] }
kurbo = { version = "0.11.0", features = ["serde"] }
petgraph = { version = "0.7.1", default-features = false, features = [
"graphmap",
] }
[profile.dev]
opt-level = 1

View file

@ -50,6 +50,11 @@ glam = { workspace = true, default-features = false, features = [
"scalar-math",
] }
serde_json = { workspace = true }
petgraph = { workspace = true, default-features = false, features = [
"graphmap",
] }
rustc-hash = { workspace = true }
math-parser = { path = "../../libraries/math-parser" }
# Required dependencies
half = { version = "2.4.1", default-features = false, features = ["bytemuck"] }
@ -77,7 +82,6 @@ web-sys = { workspace = true, optional = true, features = [
image = { workspace = true, optional = true, default-features = false, features = [
"png",
] }
math-parser = { path = "../../libraries/math-parser" }
[dev-dependencies]
# Workspace dependencies

View file

@ -0,0 +1,95 @@
use crate::vector::{PointId, VectorData, VectorDataIndex};
use glam::DVec2;
use petgraph::prelude::UnGraphMap;
use rustc_hash::FxHashSet;
impl VectorData {
/// Collapse all points with edges shorter than the specified distance
pub(crate) fn merge_by_distance(&mut self, distance: f64) {
// Treat self as an undirected graph
let indices = VectorDataIndex::build_from(self);
// Graph containing only short edges, referencing the data graph
let mut short_edges = UnGraphMap::new();
for segment_id in self.segment_ids().iter().copied() {
let length = indices.segment_chord_length(segment_id);
if length < distance {
let [start, end] = indices.segment_ends(segment_id);
let start = indices.point_graph.node_weight(start).unwrap().id;
let end = indices.point_graph.node_weight(end).unwrap().id;
short_edges.add_node(start);
short_edges.add_node(end);
short_edges.add_edge(start, end, segment_id);
}
}
// Group connected segments to collapse them into a single point
// TODO: there are a few possible algorithms for this - perhaps test empirically to find fastest
let collapse: Vec<FxHashSet<PointId>> = petgraph::algo::tarjan_scc(&short_edges).into_iter().map(|connected| connected.into_iter().collect()).collect();
let average_position = collapse
.iter()
.map(|collapse_set| {
let sum: DVec2 = collapse_set.iter().map(|&id| indices.point_position(id, self)).sum();
sum / collapse_set.len() as f64
})
.collect::<Vec<_>>();
// Collect points and segments to delete at the end to avoid invalidating indices
let mut points_to_delete = FxHashSet::default();
let mut segments_to_delete = FxHashSet::default();
for (mut collapse_set, average_pos) in collapse.into_iter().zip(average_position.into_iter()) {
// Remove any segments where both endpoints are in the collapse set
segments_to_delete.extend(self.segment_domain.iter().filter_map(|(id, start_offset, end_offset, _)| {
let start = self.point_domain.ids()[start_offset];
let end = self.point_domain.ids()[end_offset];
if collapse_set.contains(&start) && collapse_set.contains(&end) { Some(id) } else { None }
}));
// Delete all points but the first, set its position to the average, and update segments
let first_id = collapse_set.iter().copied().next().unwrap();
collapse_set.remove(&first_id);
let first_offset = indices.point_to_offset[&first_id];
// Look for segments with endpoints in `collapse_set` and replace them with the point we are collapsing to
for (_, start_offset, end_offset, handles) in self.segment_domain.iter_mut() {
let start_id = self.point_domain.ids()[*start_offset];
let end_id = self.point_domain.ids()[*end_offset];
// Update Bezier handles for moved points
if start_id == first_id {
let point_position = self.point_domain.position[*start_offset];
handles.move_start(average_pos - point_position);
}
if end_id == first_id {
let point_position = self.point_domain.position[*end_offset];
handles.move_end(average_pos - point_position);
}
// Replace removed points with the collapsed point
if collapse_set.contains(&start_id) {
let point_position = self.point_domain.position[*start_offset];
*start_offset = first_offset;
handles.move_start(average_pos - point_position);
}
if collapse_set.contains(&end_id) {
let point_position = self.point_domain.position[*end_offset];
*end_offset = first_offset;
handles.move_end(average_pos - point_position);
}
}
// Update the position of the collapsed point
self.point_domain.position[first_offset] = average_pos;
points_to_delete.extend(collapse_set)
}
// Remove faces whose start or end segments are removed
// TODO: Adjust faces and only delete if all (or all but one) segments are removed
self.region_domain
.retain_with_region(|_, segment_range| segments_to_delete.contains(segment_range.start()) || segments_to_delete.contains(segment_range.end()));
self.segment_domain.retain(|id| !segments_to_delete.contains(id), usize::MAX);
self.point_domain.retain(&mut self.segment_domain, |id| !points_to_delete.contains(id));
}
}

View file

@ -0,0 +1 @@
mod merge_by_distance;

View file

@ -1,3 +1,4 @@
mod algorithms;
pub mod brush_stroke;
pub mod generator_nodes;
pub mod misc;

View file

@ -1,4 +1,5 @@
mod attributes;
mod indexed;
mod modification;
use super::style::{PathStyle, Stroke};
@ -9,6 +10,7 @@ use bezier_rs::ManipulatorGroup;
use core::borrow::Borrow;
use dyn_any::DynAny;
use glam::{DAffine2, DVec2};
pub use indexed::VectorDataIndex;
pub use modification::*;
use std::collections::HashMap;
@ -68,6 +70,8 @@ pub type VectorDataTable = Instances<VectorData>;
/// [VectorData] is passed between nodes.
/// It contains a list of subpaths (that may be open or closed), a transform, and some style information.
///
/// Segments are connected if they share endpoints.
#[derive(Clone, Debug, PartialEq, DynAny)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct VectorData {
@ -265,6 +269,11 @@ impl VectorData {
index.flat_map(|index| self.segment_domain.connected_points(index).map(|index| self.point_domain.ids()[index]))
}
/// Get an array slice of all segment IDs.
pub fn segment_ids(&self) -> &[SegmentId] {
self.segment_domain.ids()
}
/// Enumerate all segments that start at the point.
pub fn start_connected(&self, point: PointId) -> impl Iterator<Item = SegmentId> + '_ {
let index = [self.point_domain.resolve_id(point)].into_iter().flatten();

View file

@ -1,4 +1,6 @@
use crate::vector::vector_data::{HandleId, VectorData};
use bezier_rs::BezierHandles;
use core::iter::zip;
use dyn_any::DynAny;
use glam::{DAffine2, DVec2};
use std::collections::HashMap;
@ -80,7 +82,7 @@ impl core::hash::BuildHasher for NoHashBuilder {
pub struct PointDomain {
id: Vec<PointId>,
#[serde(alias = "positions")]
position: Vec<DVec2>,
pub(crate) position: Vec<DVec2>,
}
impl core::hash::Hash for PointDomain {
@ -112,7 +114,8 @@ impl PointDomain {
id_map.push(new_index);
new_index += 1;
} else {
id_map.push(usize::MAX); // A placeholder for invalid ids. This is checked after the segment domain is modified.
// A placeholder for invalid IDs. This is checked after the segment domain is modified.
id_map.push(usize::MAX);
}
}
@ -176,6 +179,11 @@ impl PointDomain {
*pos = transform.transform_point2(*pos);
}
}
/// Iterate over point IDs and positions
pub fn iter(&self) -> impl Iterator<Item = (PointId, DVec2)> + '_ {
self.ids().iter().copied().zip(self.positions().iter().copied())
}
}
#[derive(Clone, Debug, Default, PartialEq, Hash, DynAny)]
@ -343,6 +351,7 @@ impl SegmentDomain {
})
}
/// Get index from ID by linear search. Takes `O(n)` time.
fn id_to_index(&self, id: SegmentId) -> Option<usize> {
debug_assert_eq!(self.id.len(), self.handles.len());
debug_assert_eq!(self.id.len(), self.start_point.len());
@ -397,11 +406,34 @@ impl SegmentDomain {
pub(crate) fn connected_count(&self, point: usize) -> usize {
self.all_connected(point).count()
}
/// Iterates over segments in the domain.
///
/// Tuple is: (id, start point, end point, handles)
pub(crate) fn iter(&self) -> impl Iterator<Item = (SegmentId, usize, usize, BezierHandles)> + '_ {
let ids = self.id.iter().copied();
let start_point = self.start_point.iter().copied();
let end_point = self.end_point.iter().copied();
let handles = self.handles.iter().copied();
zip(ids, zip(start_point, zip(end_point, handles))).map(|(id, (start_point, (end_point, handles)))| (id, start_point, end_point, handles))
}
/// Iterates over segments in the domain, mutably.
///
/// Tuple is: (id, start point, end point, handles)
pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = (&mut SegmentId, &mut usize, &mut usize, &mut BezierHandles)> + '_ {
let ids = self.id.iter_mut();
let start_point = self.start_point.iter_mut();
let end_point = self.end_point.iter_mut();
let handles = self.handles.iter_mut();
zip(ids, zip(start_point, zip(end_point, handles))).map(|(id, (start_point, (end_point, handles)))| (id, start_point, end_point, handles))
}
}
#[derive(Clone, Debug, Default, PartialEq, Hash, DynAny)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
/// Stores data which is per-region. A region is an enclosed area composed of a range of segments from the [`SegmentDomain`] that can be given a fill. In future this will be extendable at runtime with custom attributes.
/// Stores data which is per-region. A region is an enclosed area composed of a range of segments from the
/// [`SegmentDomain`] that can be given a fill. In future this will be extendable at runtime with custom attributes.
pub struct RegionDomain {
#[serde(alias = "ids")]
id: Vec<RegionId>,
@ -432,6 +464,19 @@ impl RegionDomain {
self.id.retain(&f);
}
/// Like [`Self::retain`] but also gives the function access to the segment range.
///
/// Note that this function requires an allocation that `retain` avoids.
pub fn retain_with_region(&mut self, f: impl Fn(&RegionId, &core::ops::RangeInclusive<SegmentId>) -> bool) {
let keep = self.id.iter().zip(self.segment_range.iter()).map(|(id, range)| f(id, range)).collect::<Vec<_>>();
let mut iter = keep.iter().copied();
self.segment_range.retain(|_| iter.next().unwrap());
let mut iter = keep.iter().copied();
self.fill.retain(|_| iter.next().unwrap());
let mut iter = keep.iter().copied();
self.id.retain(|_| iter.next().unwrap());
}
pub fn push(&mut self, id: RegionId, segment_range: core::ops::RangeInclusive<SegmentId>, fill: FillId) {
if self.id.contains(&id) {
warn!("Duplicate region");

View file

@ -0,0 +1,90 @@
use super::{PointId, SegmentId, VectorData};
use glam::DVec2;
use petgraph::graph::{EdgeIndex, NodeIndex, UnGraph};
use rustc_hash::FxHashMap;
/// All the fixed fields of a point from the point domain.
pub struct Point {
pub id: PointId,
pub position: DVec2,
}
/// Useful indexes to speed up various operations on `VectorData`.
///
/// Important: It is the user's responsibility to ensure the indexes remain valid after mutations to the data.
pub struct VectorDataIndex {
/// Points and segments form a graph. Store it here in a form amenable to graph algorithms.
///
/// Currently, segment data is not stored as it is not used, but it could easily be added.
pub(crate) point_graph: UnGraph<Point, ()>,
pub(crate) segment_to_edge: FxHashMap<SegmentId, EdgeIndex>,
/// Get the offset from the point ID.
pub(crate) point_to_offset: FxHashMap<PointId, usize>,
// TODO: faces
}
impl VectorDataIndex {
/// Construct a [`VectorDataIndex`] by building indexes from the given [`VectorData`]. Takes `O(n)` time.
pub fn build_from(data: &VectorData) -> Self {
let point_to_offset = data.point_domain.ids().iter().copied().enumerate().map(|(a, b)| (b, a)).collect::<FxHashMap<_, _>>();
let mut point_to_node = FxHashMap::default();
let mut segment_to_edge = FxHashMap::default();
let mut graph = UnGraph::new_undirected();
for (point_id, position) in data.point_domain.iter() {
let idx = graph.add_node(Point { id: point_id, position });
point_to_node.insert(point_id, idx);
}
for (segment_id, start_offset, end_offset, ..) in data.segment_domain.iter() {
let start_id = data.point_domain.ids()[start_offset];
let end_id = data.point_domain.ids()[end_offset];
let edge = graph.add_edge(point_to_node[&start_id], point_to_node[&end_id], ());
segment_to_edge.insert(segment_id, edge);
}
Self {
point_graph: graph,
segment_to_edge,
point_to_offset,
}
}
/// Fetch the length of given segment's chord. Takes `O(1)` time.
///
/// # Panics
///
/// Will panic if no segment with the given ID is found.
pub fn segment_chord_length(&self, id: SegmentId) -> f64 {
let edge_idx = self.segment_to_edge[&id];
let (start, end) = self.point_graph.edge_endpoints(edge_idx).unwrap();
let start_position = self.point_graph.node_weight(start).unwrap().position;
let end_position = self.point_graph.node_weight(end).unwrap().position;
(start_position - end_position).length()
}
/// Get the ends of a segment. Takes `O(1)` time.
///
/// The IDs will be ordered [smallest, largest] so they can be used to find other segments with the same endpoints, regardless of direction.
///
/// # Panics
///
/// This function will panic if the ID is not present.
pub fn segment_ends(&self, id: SegmentId) -> [NodeIndex; 2] {
let (start, end) = self.point_graph.edge_endpoints(self.segment_to_edge[&id]).unwrap();
if start < end { [start, end] } else { [end, start] }
}
/// Get the physical location of a point. Takes `O(1)` time.
///
/// # Panics
///
/// Will panic if `id` isn't in the data.
pub fn point_position(&self, id: PointId, data: &VectorData) -> DVec2 {
let offset = self.point_to_offset[&id];
data.point_domain.positions()[offset]
}
}

View file

@ -886,9 +886,9 @@ fn bevel_algorithm(mut vector_data: VectorData, vector_data_transform: DAffine2,
bezier.split(bezier_rs::TValue::Parametric(parametric))[1]
}
/// Produces a list that correspons with the point id. The value is how many segments are connected.
fn segments_connected_count(vector_data: &VectorData) -> Vec<u8> {
// Count the number of segments connectign to each point.
/// Produces a list that corresponds with the point ID. The value is how many segments are connected.
fn segments_connected_count(vector_data: &VectorData) -> Vec<usize> {
// Count the number of segments connecting to each point.
let mut segments_connected_count = vec![0; vector_data.point_domain.ids().len()];
for &point_index in vector_data.segment_domain.start_point().iter().chain(vector_data.segment_domain.end_point()) {
segments_connected_count[point_index] += 1;
@ -904,7 +904,7 @@ fn bevel_algorithm(mut vector_data: VectorData, vector_data_transform: DAffine2,
}
/// Updates the index so that it points at a point with the position. If nobody else will look at the index, the original point is updated. Otherwise a new point is created.
fn create_or_modify_point(point_domain: &mut PointDomain, segments_connected_count: &mut [u8], pos: DVec2, index: &mut usize, next_id: &mut PointId, new_segments: &mut Vec<[usize; 2]>) {
fn create_or_modify_point(point_domain: &mut PointDomain, segments_connected_count: &mut [usize], pos: DVec2, index: &mut usize, next_id: &mut PointId, new_segments: &mut Vec<[usize; 2]>) {
segments_connected_count[*index] -= 1;
if segments_connected_count[*index] == 0 {
// If nobody else is going to look at this point, we're alright to modify it
@ -922,7 +922,7 @@ fn bevel_algorithm(mut vector_data: VectorData, vector_data_transform: DAffine2,
}
}
fn update_existing_segments(vector_data: &mut VectorData, vector_data_transform: DAffine2, distance: f64, segments_connected: &mut [u8]) -> Vec<[usize; 2]> {
fn update_existing_segments(vector_data: &mut VectorData, vector_data_transform: DAffine2, distance: f64, segments_connected: &mut [usize]) -> Vec<[usize; 2]> {
let mut next_id = vector_data.point_domain.next_id();
let mut new_segments = Vec::new();
@ -995,6 +995,19 @@ fn bevel(_: impl Ctx, source: VectorDataTable, #[default(10.)] distance: Length)
result
}
#[node_macro::node(name("Merge by Distance"), category("Vector"), path(graphene_core::vector))]
fn merge_by_distance(_: impl Ctx, source: VectorDataTable, #[default(10.)] distance: Length) -> VectorDataTable {
let source_transform = source.transform();
let mut source = source.one_instance().instance.clone();
source.merge_by_distance(distance);
let mut result = VectorDataTable::new(source);
*result.transform_mut() = source_transform;
result
}
#[node_macro::node(category("Vector"), path(graphene_core::vector))]
async fn area(ctx: impl Ctx + CloneVarArgs + ExtractAll, vector_data: impl Node<Context<'static>, Output = VectorDataTable>) -> f64 {
let new_ctx = OwnedContextImpl::from(ctx).with_footprint(Footprint::default()).into_context();