Optimize map rendering (#242)

It is much faster now, way less terrible blitting code.

---
SpaceManiac's benchmarks indicate about 9% speedup when rendering MetaStation with `--release` on.
This commit is contained in:
Pieter-Jan Briers 2021-02-17 03:00:18 +01:00 committed by GitHub
parent 0b95746cb8
commit e07801070e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 109 additions and 57 deletions

15
Cargo.lock generated
View file

@ -189,6 +189,20 @@ name = "bytemuck"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a4bad0c5981acc24bc09e532f35160f952e35422603f0563cd7a73c2c2e65a0"
dependencies = [
"bytemuck_derive",
]
[[package]]
name = "bytemuck_derive"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54"
dependencies = [
"proc-macro2 1.0.24",
"quote 1.0.8",
"syn",
]
[[package]]
name = "byteorder"
@ -475,6 +489,7 @@ name = "dmm-tools"
version = "0.1.0"
dependencies = [
"bumpalo",
"bytemuck",
"dreammaker",
"gfx_core",
"inflate",

View file

@ -14,6 +14,10 @@ rand = "0.7.0"
dreammaker = { path = "../dreammaker" }
lodepng = "3.0.0"
[dependencies.bytemuck]
version = "1.5"
features = ["derive"]
[dependencies.bumpalo]
version = "3.0.0"
features = ["collections"]

View file

@ -4,11 +4,13 @@
use std::io;
use std::path::Path;
use bytemuck::Pod;
use ndarray::Array3;
use lodepng::{self, RGBA, Decoder, ColorType};
use ndarray::Array2;
pub use dm::dmi::*;
use std::ops::{Index, IndexMut};
type Rect = (u32, u32, u32, u32);
@ -49,6 +51,43 @@ impl IconFile {
}
}
#[derive(Default, Clone, Copy, Pod, Zeroable, Eq, PartialEq)]
#[repr(C)]
pub struct Rgba8 {
pub r: u8,
pub g: u8,
pub b: u8,
pub a: u8,
}
impl Rgba8 {
pub fn new(r: u8, g: u8, b: u8, a: u8) -> Rgba8 {
Rgba8 { r, g, b, a }
}
pub fn as_bytes(&self) -> &[u8; 4] {
bytemuck::cast_ref(self)
}
pub fn as_bytes_mut(&mut self) -> &mut [u8; 4] {
bytemuck::cast_mut(self)
}
}
impl Index<u8> for Rgba8 {
type Output = u8;
fn index(&self, index: u8) -> &Self::Output {
&self.as_bytes()[index as usize]
}
}
impl IndexMut<u8> for Rgba8 {
fn index_mut(&mut self, index: u8) -> &mut Self::Output {
&mut self.as_bytes_mut()[index as usize]
}
}
// ----------------------------------------------------------------------------
// Image manipulation
@ -56,7 +95,7 @@ impl IconFile {
pub struct Image {
pub width: u32,
pub height: u32,
pub data: Array3<u8>,
pub data: Array2<Rgba8>,
}
impl Image {
@ -64,7 +103,9 @@ impl Image {
Image {
width,
height,
data: Array3::zeros((height as usize, width as usize, 4)),
data: {
Array2::default((width as usize, height as usize))
},
}
}
@ -72,16 +113,12 @@ impl Image {
Image {
width: bitmap.width as u32,
height: bitmap.height as u32,
data: Array3::from_shape_fn((bitmap.height, bitmap.width, 4), |(y, x, c)| {
let rgba = bitmap.buffer[y * bitmap.width + x];
match c {
0 => rgba.r,
1 => rgba.g,
2 => rgba.b,
3 => rgba.a,
_ => unreachable!(),
}
}),
data: {
let cast_input = bytemuck::cast_slice(bitmap.buffer.as_slice());
let mut arr = Array2::default((bitmap.width, bitmap.height));
arr.as_slice_mut().unwrap().copy_from_slice(cast_input);
arr
},
}
}
@ -105,7 +142,7 @@ impl Image {
Ok(Image::from_rgba(bitmap))
}
#[cfg(feature="png")]
#[cfg(feature = "png")]
pub fn to_file(&self, path: &Path) -> io::Result<()> {
use std::fs::File;
@ -115,51 +152,49 @@ impl Image {
let mut writer = encoder.write_header()?;
// TODO: metadata with write_chunk()
writer.write_image_data(self.data.as_slice().unwrap())?;
writer.write_image_data(bytemuck::cast_slice(self.data.as_slice().unwrap()))?;
Ok(())
}
pub fn composite(&mut self, other: &Image, pos: (u32, u32), crop: Rect, color: [u8; 4]) {
use ndarray::Axis;
let mut destination = self.data.slice_mut(s![
pos.1 as isize..(pos.1 + crop.3) as isize,
pos.0 as isize..(pos.0 + crop.2) as isize,
..
]);
let source = other.data.slice(s![
crop.1 as isize..(crop.1 + crop.3) as isize,
crop.0 as isize..(crop.0 + crop.2) as isize,
..
]);
// loop over each [r, g, b, a] available in the relevant area
for (mut dest, orig_src) in destination.lanes_mut(Axis(2)).into_iter().zip(source.lanes(Axis(2))) {
macro_rules! tint {
($i:expr) => {
mul255(
*orig_src.get($i).unwrap_or(&255),
*color.get($i).unwrap_or(&255),
)
};
}
let src = [tint!(0), tint!(1), tint!(2), tint!(3)];
// out_A = src_A + dst_A (1 - src_A)
// out_RGB = (src_RGB src_A + dst_RGB dst_A (1 - src_A)) / out_A
let out_a = src[3] + mul255(dest[3], 255 - src[3]);
if out_a != 0 {
for i in 0..3 {
dest[i] = ((src[i] as u32 * src[3] as u32
+ dest[i] as u32 * dest[3] as u32 * (255 - src[3] as u32) / 255)
/ out_a as u32) as u8;
let other_dat = other.data.as_slice().unwrap();
let self_dat = self.data.as_slice_mut().unwrap();
let mut sy = crop.1;
for y in pos.1..(pos.1 + crop.3) {
let mut sx = crop.0;
for x in pos.0..(pos.0 + crop.2) {
let src = other_dat[(sy * other.width + sx) as usize];
macro_rules! tint {
($i:expr) => {
mul255(
src[$i],
color[$i],
)
};
}
} else {
for i in 0..3 {
dest[i] = 0;
let mut dst = &mut self_dat[(y * self.width + x) as usize];
let src_tint = Rgba8::new(tint!(0), tint!(1), tint!(2), tint!(3));
// out_A = src_A + dst_A (1 - src_A)
// out_RGB = (src_RGB src_A + dst_RGB dst_A (1 - src_A)) / out_A
let out_a = src_tint.a + mul255(dst.a, 255 - src_tint.a);
if out_a != 0 {
for i in 0..3 {
dst[i] = ((src_tint[i] as u32 * src_tint.a as u32
+ dst[i] as u32 * dst.a as u32 * (255 - src_tint.a as u32) / 255)
/ out_a as u32) as u8;
}
} else {
for i in 0..3 {
dst[i] = 0;
}
}
dst.a = out_a as u8;
sx += 1;
}
dest[3] = out_a as u8;
sy += 1;
}
}
}

View file

@ -49,13 +49,11 @@ fn all_same(icon_file: &IconFile, states: &[&State]) -> bool {
let rect2 = icon_file.rect_of_index(start_index + i);
let slice1 = icon_file.image.data.slice(s![
rect1.1 as isize..(rect1.1 + rect1.3) as isize,
rect1.0 as isize..(rect1.0 + rect1.2) as isize,
..
rect1.0 as isize..(rect1.0 + rect1.2) as isize
]);
let slice2 = icon_file.image.data.slice(s![
rect2.1 as isize..(rect2.1 + rect2.3) as isize,
rect2.0 as isize..(rect2.0 + rect2.2) as isize,
..
rect2.0 as isize..(rect2.0 + rect2.2) as isize
]);
if slice1 != slice2 {
return false;

View file

@ -7,7 +7,7 @@ extern crate dreammaker as dm;
extern crate lodepng;
extern crate inflate;
#[macro_use] extern crate ndarray;
#[macro_use] extern crate bytemuck;
extern crate linked_hash_map;
extern crate rand;
extern crate bumpalo;