Skip to content

Commit

Permalink
iter: Help the compiler vectorize memory reads
Browse files Browse the repository at this point in the history
Reading from &[T; C2] is way faster than reading from [T], provided
that the access is vectorized.

The generics are getting a bit unwieldy, but the performance boost is
worth it (2-3x speedup on my MacBook 14" M1 Pro).

Signed-off-by: Christopher N. Hesse <[email protected]>
  • Loading branch information
raymanfx committed Oct 29, 2023
1 parent 4e499e3 commit ed9b5d5
Showing 1 changed file with 13 additions and 17 deletions.
30 changes: 13 additions & 17 deletions ffimage/src/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use core::{marker::PhantomData, ops::Deref};
/// The trait is automatically implemented for all pixel types which implement the `From<[T; C]>`
/// trait where T: Copy and C means the number of channels (e.g. 3 for RGB).
pub trait PixelsExt<const C: usize>: Iterator {
fn pixels<P>(self) -> Pixels<Self::Item, Self, P, P, C>
fn pixels<P>(self) -> Pixels<Self::Item, Self, P, P, C, C>
where
Self: Sized,
{
Expand All @@ -15,12 +15,12 @@ pub trait PixelsExt<const C: usize>: Iterator {

impl<I, const C: usize> PixelsExt<C> for I where I: Iterator {}

pub struct Pixels<T, I, P, P2, const C: usize> {
pub struct Pixels<T, I, P, P2, const C: usize, const C2: usize> {
_marker: PhantomData<(T, P, P2)>,
iter: I,
}

impl<T, I, P, P2, const C: usize> Pixels<T, I, P, P2, C> {
impl<T, I, P, P2, const C: usize, const C2: usize> Pixels<T, I, P, P2, C, C2> {
pub fn new(iter: I) -> Self {
Pixels {
_marker: PhantomData,
Expand All @@ -29,7 +29,7 @@ impl<T, I, P, P2, const C: usize> Pixels<T, I, P, P2, C> {
}
}

impl<'a, T, I, P, P2, const C: usize> Pixels<T, I, P, P2, C>
impl<'a, T, I, P, P2, const C: usize, const C2: usize> Pixels<T, I, P, P2, C, C2>
where
T: Copy + 'a,
I: Iterator<Item = T>,
Expand All @@ -38,35 +38,31 @@ where
{
pub fn write(self, out: impl IntoIterator<Item = &'a mut T>)
where
P2: Deref,
<P2 as Deref>::Target: AsRef<[T]>,
P2: Deref<Target = [T; C2]>,
{
let mut out = out.into_iter();

self.for_each(|p2| {
p2.as_ref().iter().for_each(|t| {
let _out = out.next().unwrap();
*_out = *t;
});
p2.iter().for_each(|t| *(out.next().unwrap()) = *t);
});
}
}

impl<'a, T, I, P, P2, const C: usize> Pixels<T, I, P, P2, C>
impl<'a, T, I, P, P2, const C: usize, const C2: usize> Pixels<T, I, P, P2, C, C2>
where
T: Copy + 'a,
T: Copy + Default + 'a,
I: Iterator<Item = T>,
P: From<[T; C]>,
{
pub fn colorconvert<P3>(self) -> Pixels<T, I, P, P3, C>
pub fn colorconvert<P3>(self) -> Pixels<T, I, P, P3, C, C2>
where
P3: From<P2>,
P3: From<[T; C2]> + From<P2>,
{
Pixels::new(self.iter)
}
}

impl<T, I, P, P2, const C: usize> Iterator for Pixels<T, I, P, P2, C>
impl<T, I, P, P2, const C: usize, const C2: usize> Iterator for Pixels<T, I, P, P2, C, C2>
where
T: Copy,
I: Iterator<Item = T>,
Expand All @@ -77,8 +73,8 @@ where

fn next(&mut self) -> Option<Self::Item> {
let mut chunk = [self.iter.next()?; C];
for channel in chunk.iter_mut().take(C).skip(1) {
*channel = self.iter.next()?
for i in 1..C {
chunk[i] = self.iter.next()?;
}
Some(P2::from(P::from(chunk)))
}
Expand Down

0 comments on commit ed9b5d5

Please sign in to comment.