Skip to content

Commit 5d23f6e

Browse files
committed
Some improvements
1 parent de01f24 commit 5d23f6e

File tree

6 files changed

+31
-145
lines changed

6 files changed

+31
-145
lines changed

src/app/src/main.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,14 @@ fn main() {
6666
lab_store.resize(width as usize * components * height as usize, 0f32);
6767
let src_stride = width * components as u32;
6868
let start_time = Instant::now();
69-
rgb_to_lab(
69+
rgb_to_oklab(
7070
src_bytes,
7171
src_stride,
7272
&mut lab_store,
7373
store_stride as u32,
7474
width,
75-
height
75+
height,
76+
TransferFunction::Srgb,
7677
);
7778
let elapsed_time = start_time.elapsed();
7879
// Print the elapsed time in milliseconds
@@ -100,13 +101,14 @@ fn main() {
100101
// }
101102

102103
let start_time = Instant::now();
103-
lab_to_srgb(
104+
oklab_to_rgb(
104105
&lab_store,
105106
store_stride as u32,
106107
&mut dst_slice,
107108
src_stride,
108109
width,
109110
height,
111+
TransferFunction::Srgb,
110112
);
111113

112114
let elapsed_time = start_time.elapsed();

src/neon/cie.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ pub(crate) unsafe fn neon_triple_to_luv(
6464
);
6565
let u_prime = vdivq_f32(vmulq_n_f32(x, 4f32), den);
6666
let v_prime = vdivq_f32(vmulq_n_f32(y, 9f32), den);
67-
let sub_u_prime = vsubq_f32(u_prime, vdupq_n_f32(crate::luv::LUV_WHITE_U_PRIME));
68-
let sub_v_prime = vsubq_f32(v_prime, vdupq_n_f32(crate::luv::LUV_WHITE_V_PRIME));
67+
let sub_u_prime = vsubq_f32(u_prime, vdupq_n_f32(LUV_WHITE_U_PRIME));
68+
let sub_v_prime = vsubq_f32(v_prime, vdupq_n_f32(LUV_WHITE_V_PRIME));
6969
let l13 = vmulq_n_f32(l, 13f32);
7070
let u = vbslq_f32(nan_mask, zeros, vmulq_f32(l13, sub_u_prime));
7171
let v = vbslq_f32(nan_mask, zeros, vmulq_f32(l13, sub_v_prime));

src/neon/image_to_hsv.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use crate::neon::{neon_rgb_to_hsl, neon_rgb_to_hsv};
1111
use crate::{load_u8_and_deinterleave, load_u8_and_deinterleave_half};
1212
use std::arch::aarch64::*;
1313

14+
#[allow(dead_code)]
1415
#[inline(always)]
1516
pub unsafe fn neon_channels_to_hsv<
1617
const CHANNELS_CONFIGURATION: u8,

src/neon/oklab_to_image.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
* // Use of this source code is governed by a BSD-style
55
* // license that can be found in the LICENSE file.
66
*/
7+
use std::arch::aarch64::*;
8+
9+
use erydanos::{vcosq_f32, vsinq_f32};
10+
11+
use crate::{load_f32_and_deinterleave_direct, TransferFunction, XYZ_TO_SRGB_D65};
712
use crate::image::ImageConfiguration;
813
use crate::image_to_oklab::OklabTarget;
914
use crate::neon::get_neon_gamma_transfer;
1015
use crate::neon::math::vcolorq_matrix_f32;
11-
use crate::{load_f32_and_deinterleave, TransferFunction, XYZ_TO_SRGB_D65};
12-
use erydanos::{vcosq_f32, vsinq_f32};
13-
use std::arch::aarch64::*;
1416

1517
#[inline(always)]
1618
unsafe fn neon_oklab_gamma_vld<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
@@ -48,7 +50,7 @@ unsafe fn neon_oklab_gamma_vld<const CHANNELS_CONFIGURATION: u8, const TARGET: u
4850
let transfer = get_neon_gamma_transfer(transfer_function);
4951
let v_scale_alpha = vdupq_n_f32(255f32);
5052
let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
51-
let (l, mut a, mut b, mut a_f32) = load_f32_and_deinterleave!(src, image_configuration);
53+
let (l, mut a, mut b, mut a_f32) = load_f32_and_deinterleave_direct!(src, image_configuration);
5254

5355
if target == OklabTarget::OKLCH {
5456
let a0 = vmulq_f32(a, vcosq_f32(b));

src/neon/xyz_lab_to_image.rs

Lines changed: 9 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ pub(crate) unsafe fn neon_xyz_lab_vld<
2020
const TARGET: u8,
2121
>(
2222
src: *const f32,
23-
transfer_function: TransferFunction,
23+
transfer: &unsafe fn(float32x4_t) -> float32x4_t,
2424
c1: float32x4_t,
2525
c2: float32x4_t,
2626
c3: float32x4_t,
@@ -32,7 +32,6 @@ pub(crate) unsafe fn neon_xyz_lab_vld<
3232
c9: float32x4_t,
3333
) -> (uint32x4_t, uint32x4_t, uint32x4_t) {
3434
let target: XyzTarget = TARGET.into();
35-
let transfer = get_neon_gamma_transfer(transfer_function);
3635
let v_scale_color = vdupq_n_f32(255f32);
3736
let lab_pixel = vld3q_f32(src);
3837
let (mut r_f32, mut g_f32, mut b_f32) = (lab_pixel.0, lab_pixel.1, lab_pixel.2);
@@ -121,6 +120,8 @@ pub unsafe fn neon_xyz_to_channels<
121120
let c8 = vdupq_n_f32(*matrix.get_unchecked(2).get_unchecked(1));
122121
let c9 = vdupq_n_f32(*matrix.get_unchecked(2).get_unchecked(2));
123122

123+
let transfer = get_neon_gamma_transfer(transfer_function);
124+
124125
let src_channels = 3usize;
125126

126127
while cx + 16 < width as usize {
@@ -131,68 +132,28 @@ pub unsafe fn neon_xyz_to_channels<
131132

132133
let (r_row0_, g_row0_, b_row0_) =
133134
neon_xyz_lab_vld::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
134-
src_ptr_0,
135-
transfer_function,
136-
c1,
137-
c2,
138-
c3,
139-
c4,
140-
c5,
141-
c6,
142-
c7,
143-
c8,
144-
c9,
135+
src_ptr_0, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
145136
);
146137

147138
let src_ptr_1 = offset_src_ptr.add(4 * src_channels);
148139

149140
let (r_row1_, g_row1_, b_row1_) =
150141
neon_xyz_lab_vld::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
151-
src_ptr_1,
152-
transfer_function,
153-
c1,
154-
c2,
155-
c3,
156-
c4,
157-
c5,
158-
c6,
159-
c7,
160-
c8,
161-
c9,
142+
src_ptr_1, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
162143
);
163144

164145
let src_ptr_2 = offset_src_ptr.add(4 * 2 * src_channels);
165146

166147
let (r_row2_, g_row2_, b_row2_) =
167148
neon_xyz_lab_vld::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
168-
src_ptr_2,
169-
transfer_function,
170-
c1,
171-
c2,
172-
c3,
173-
c4,
174-
c5,
175-
c6,
176-
c7,
177-
c8,
178-
c9,
149+
src_ptr_2, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
179150
);
180151

181152
let src_ptr_3 = offset_src_ptr.add(4 * 3 * src_channels);
182153

183154
let (r_row3_, g_row3_, b_row3_) =
184155
neon_xyz_lab_vld::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
185-
src_ptr_3,
186-
transfer_function,
187-
c1,
188-
c2,
189-
c3,
190-
c4,
191-
c5,
192-
c6,
193-
c7,
194-
c8,
195-
c9,
156+
src_ptr_3, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
196157
);
197158

198159
let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_));
@@ -258,34 +219,14 @@ pub unsafe fn neon_xyz_to_channels<
258219

259220
let (r_row0_, g_row0_, b_row0_) =
260221
neon_xyz_lab_vld::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
261-
src_ptr_0,
262-
transfer_function,
263-
c1,
264-
c2,
265-
c3,
266-
c4,
267-
c5,
268-
c6,
269-
c7,
270-
c8,
271-
c9,
222+
src_ptr_0, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
272223
);
273224

274225
let src_ptr_1 = offset_src_ptr.add(4 * src_channels);
275226

276227
let (r_row1_, g_row1_, b_row1_) =
277228
neon_xyz_lab_vld::<CHANNELS_CONFIGURATION, USE_ALPHA, TARGET>(
278-
src_ptr_1,
279-
transfer_function,
280-
c1,
281-
c2,
282-
c3,
283-
c4,
284-
c5,
285-
c6,
286-
c7,
287-
c8,
288-
c9,
229+
src_ptr_1, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
289230
);
290231

291232
let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_));

src/neon/xyza_laba_to_image.rs

Lines changed: 8 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::arch::aarch64::*;
1616
#[inline(always)]
1717
pub(crate) unsafe fn neon_xyza_lab_vld<const CHANNELS_CONFIGURATION: u8, const TARGET: u8>(
1818
src: *const f32,
19-
transfer_function: TransferFunction,
19+
transfer: &unsafe fn(float32x4_t) -> float32x4_t,
2020
c1: float32x4_t,
2121
c2: float32x4_t,
2222
c3: float32x4_t,
@@ -28,7 +28,6 @@ pub(crate) unsafe fn neon_xyza_lab_vld<const CHANNELS_CONFIGURATION: u8, const T
2828
c9: float32x4_t,
2929
) -> (uint32x4_t, uint32x4_t, uint32x4_t, uint32x4_t) {
3030
let target: XyzTarget = TARGET.into();
31-
let transfer = get_neon_gamma_transfer(transfer_function);
3231
let v_scale_color = vdupq_n_f32(255f32);
3332
let lab_pixel = vld4q_f32(src);
3433
let (mut r_f32, mut g_f32, mut b_f32) = (lab_pixel.0, lab_pixel.1, lab_pixel.2);
@@ -92,6 +91,7 @@ pub unsafe fn neon_xyza_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET:
9291
matrix: &[[f32; 3]; 3],
9392
transfer_function: TransferFunction,
9493
) -> usize {
94+
let transfer = get_neon_gamma_transfer(transfer_function);
9595
let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION.into();
9696
if !image_configuration.has_alpha() {
9797
panic!("Alpha may be set only on images with alpha");
@@ -120,68 +120,28 @@ pub unsafe fn neon_xyza_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET:
120120

121121
let (r_row0_, g_row0_, b_row0_, a_row0_) =
122122
neon_xyza_lab_vld::<CHANNELS_CONFIGURATION, TARGET>(
123-
src_ptr_0,
124-
transfer_function,
125-
c1,
126-
c2,
127-
c3,
128-
c4,
129-
c5,
130-
c6,
131-
c7,
132-
c8,
133-
c9,
123+
src_ptr_0, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
134124
);
135125

136126
let src_ptr_1 = offset_src_ptr.add(4 * CHANNELS);
137127

138128
let (r_row1_, g_row1_, b_row1_, a_row1_) =
139129
neon_xyza_lab_vld::<CHANNELS_CONFIGURATION, TARGET>(
140-
src_ptr_1,
141-
transfer_function,
142-
c1,
143-
c2,
144-
c3,
145-
c4,
146-
c5,
147-
c6,
148-
c7,
149-
c8,
150-
c9,
130+
src_ptr_1, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
151131
);
152132

153133
let src_ptr_2 = offset_src_ptr.add(4 * 2 * CHANNELS);
154134

155135
let (r_row2_, g_row2_, b_row2_, a_row2_) =
156136
neon_xyza_lab_vld::<CHANNELS_CONFIGURATION, TARGET>(
157-
src_ptr_2,
158-
transfer_function,
159-
c1,
160-
c2,
161-
c3,
162-
c4,
163-
c5,
164-
c6,
165-
c7,
166-
c8,
167-
c9,
137+
src_ptr_2, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
168138
);
169139

170140
let src_ptr_3 = offset_src_ptr.add(4 * 3 * CHANNELS);
171141

172142
let (r_row3_, g_row3_, b_row3_, a_row3_) =
173143
neon_xyza_lab_vld::<CHANNELS_CONFIGURATION, TARGET>(
174-
src_ptr_3,
175-
transfer_function,
176-
c1,
177-
c2,
178-
c3,
179-
c4,
180-
c5,
181-
c6,
182-
c7,
183-
c8,
184-
c9,
144+
src_ptr_3, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
185145
);
186146

187147
let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_));
@@ -221,34 +181,14 @@ pub unsafe fn neon_xyza_to_image<const CHANNELS_CONFIGURATION: u8, const TARGET:
221181

222182
let (r_row0_, g_row0_, b_row0_, a_row0_) =
223183
neon_xyza_lab_vld::<CHANNELS_CONFIGURATION, TARGET>(
224-
src_ptr_0,
225-
transfer_function,
226-
c1,
227-
c2,
228-
c3,
229-
c4,
230-
c5,
231-
c6,
232-
c7,
233-
c8,
234-
c9,
184+
src_ptr_0, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
235185
);
236186

237187
let src_ptr_1 = offset_src_ptr.add(4 * CHANNELS);
238188

239189
let (r_row1_, g_row1_, b_row1_, a_row1_) =
240190
neon_xyza_lab_vld::<CHANNELS_CONFIGURATION, TARGET>(
241-
src_ptr_1,
242-
transfer_function,
243-
c1,
244-
c2,
245-
c3,
246-
c4,
247-
c5,
248-
c6,
249-
c7,
250-
c8,
251-
c9,
191+
src_ptr_1, &transfer, c1, c2, c3, c4, c5, c6, c7, c8, c9,
252192
);
253193

254194
let r_row01 = vcombine_u16(vqmovn_u32(r_row0_), vqmovn_u32(r_row1_));

0 commit comments

Comments
 (0)