@@ -12,7 +12,7 @@ use std::arch::x86_64::*;
12
12
13
13
use crate :: avx:: gamma_curves:: get_avx_gamma_transfer;
14
14
use crate :: avx:: routines:: avx_vld_f32_and_deinterleave;
15
- use crate :: avx:: { avx2_interleave_rgb, avx2_interleave_rgba_epi8, avx2_pack_s32 , avx2_pack_u16 } ;
15
+ use crate :: avx:: { avx2_interleave_rgb, avx2_interleave_rgba_epi8, avx2_pack_u16 , avx2_pack_u32 } ;
16
16
use crate :: image:: ImageConfiguration ;
17
17
use crate :: {
18
18
avx_store_and_interleave_v3_half_u8, avx_store_and_interleave_v3_u8,
@@ -22,9 +22,8 @@ use crate::{
22
22
#[ inline( always) ]
23
23
unsafe fn gamma_vld < const CHANNELS_CONFIGURATION : u8 , const USE_ALPHA : bool > (
24
24
src : * const f32 ,
25
- transfer_function : TransferFunction ,
25
+ transfer : & unsafe fn ( __m256 ) -> __m256 ,
26
26
) -> ( __m256i , __m256i , __m256i , __m256i ) {
27
- let transfer = get_avx_gamma_transfer ( transfer_function) ;
28
27
let v_scale_alpha = _mm256_set1_ps ( 255f32 ) ;
29
28
let ( mut r_f32, mut g_f32, mut b_f32, mut a_f32) =
30
29
avx_vld_f32_and_deinterleave :: < CHANNELS_CONFIGURATION > ( src) ;
@@ -46,50 +45,57 @@ unsafe fn gamma_vld<const CHANNELS_CONFIGURATION: u8, const USE_ALPHA: bool>(
46
45
}
47
46
48
47
#[ inline( always) ]
49
- pub unsafe fn avx_linear_to_gamma < const CHANNELS_CONFIGURATION : u8 , const USE_ALPHA : bool > (
48
+ pub unsafe fn avx_linear_to_gamma <
49
+ const CHANNELS_CONFIGURATION : u8 ,
50
+ const USE_ALPHA : bool ,
51
+ const TRANSFER_FUNCTION : u8 ,
52
+ > (
50
53
start_cx : usize ,
51
54
src : * const f32 ,
52
55
src_offset : u32 ,
53
56
dst : * mut u8 ,
54
57
dst_offset : u32 ,
55
58
width : u32 ,
56
- transfer_function : TransferFunction ,
59
+ _ : TransferFunction ,
57
60
) -> usize {
58
61
let image_configuration: ImageConfiguration = CHANNELS_CONFIGURATION . into ( ) ;
59
62
let channels = image_configuration. get_channels_count ( ) ;
60
63
let mut cx = start_cx;
61
64
65
+ let transfer_function: TransferFunction = TRANSFER_FUNCTION . into ( ) ;
66
+ let transfer = get_avx_gamma_transfer ( transfer_function) ;
67
+
62
68
while cx + 32 < width as usize {
63
69
let offset_src_ptr =
64
70
( ( src as * const u8 ) . add ( src_offset as usize ) as * const f32 ) . add ( cx * channels) ;
65
71
66
72
let src_ptr_0 = offset_src_ptr;
67
73
68
74
let ( r_row0_, g_row0_, b_row0_, a_row0_) =
69
- gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_0, transfer_function ) ;
75
+ gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_0, & transfer ) ;
70
76
71
77
let src_ptr_1 = offset_src_ptr. add ( 8 * channels) ;
72
78
73
79
let ( r_row1_, g_row1_, b_row1_, a_row1_) =
74
- gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_1, transfer_function ) ;
80
+ gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_1, & transfer ) ;
75
81
76
82
let src_ptr_2 = offset_src_ptr. add ( 8 * 2 * channels) ;
77
83
78
84
let ( r_row2_, g_row2_, b_row2_, a_row2_) =
79
- gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_2, transfer_function ) ;
85
+ gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_2, & transfer ) ;
80
86
81
87
let src_ptr_3 = offset_src_ptr. add ( 8 * 3 * channels) ;
82
88
83
89
let ( r_row3_, g_row3_, b_row3_, a_row3_) =
84
- gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_3, transfer_function ) ;
90
+ gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_3, & transfer ) ;
85
91
86
- let r_row01 = avx2_pack_s32 ( r_row0_, r_row1_) ;
87
- let g_row01 = avx2_pack_s32 ( g_row0_, g_row1_) ;
88
- let b_row01 = avx2_pack_s32 ( b_row0_, b_row1_) ;
92
+ let r_row01 = avx2_pack_u32 ( r_row0_, r_row1_) ;
93
+ let g_row01 = avx2_pack_u32 ( g_row0_, g_row1_) ;
94
+ let b_row01 = avx2_pack_u32 ( b_row0_, b_row1_) ;
89
95
90
- let r_row23 = avx2_pack_s32 ( r_row2_, r_row3_) ;
91
- let g_row23 = avx2_pack_s32 ( g_row2_, g_row3_) ;
92
- let b_row23 = avx2_pack_s32 ( b_row2_, b_row3_) ;
96
+ let r_row23 = avx2_pack_u32 ( r_row2_, r_row3_) ;
97
+ let g_row23 = avx2_pack_u32 ( g_row2_, g_row3_) ;
98
+ let b_row23 = avx2_pack_u32 ( b_row2_, b_row3_) ;
93
99
94
100
let r_row = avx2_pack_u16 ( r_row01, r_row23) ;
95
101
let g_row = avx2_pack_u16 ( g_row01, g_row23) ;
@@ -98,8 +104,8 @@ pub unsafe fn avx_linear_to_gamma<const CHANNELS_CONFIGURATION: u8, const USE_AL
98
104
let dst_ptr = dst. add ( dst_offset as usize + cx * channels) ;
99
105
100
106
if USE_ALPHA {
101
- let a_row01 = avx2_pack_s32 ( a_row0_, a_row1_) ;
102
- let a_row23 = avx2_pack_s32 ( a_row2_, a_row3_) ;
107
+ let a_row01 = avx2_pack_u32 ( a_row0_, a_row1_) ;
108
+ let a_row23 = avx2_pack_u32 ( a_row2_, a_row3_) ;
103
109
let a_row = avx2_pack_u16 ( a_row01, a_row23) ;
104
110
avx_store_and_interleave_v4_u8 ! (
105
111
dst_ptr,
@@ -125,16 +131,16 @@ pub unsafe fn avx_linear_to_gamma<const CHANNELS_CONFIGURATION: u8, const USE_AL
125
131
let src_ptr_0 = offset_src_ptr;
126
132
127
133
let ( r_row0_, g_row0_, b_row0_, a_row0_) =
128
- gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_0, transfer_function ) ;
134
+ gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_0, & transfer ) ;
129
135
130
136
let src_ptr_1 = offset_src_ptr. add ( 8 * channels) ;
131
137
132
138
let ( r_row1_, g_row1_, b_row1_, a_row1_) =
133
- gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_1, transfer_function ) ;
139
+ gamma_vld :: < CHANNELS_CONFIGURATION , USE_ALPHA > ( src_ptr_1, & transfer ) ;
134
140
135
- let r_row01 = avx2_pack_s32 ( r_row0_, r_row1_) ;
136
- let g_row01 = avx2_pack_s32 ( g_row0_, g_row1_) ;
137
- let b_row01 = avx2_pack_s32 ( b_row0_, b_row1_) ;
141
+ let r_row01 = avx2_pack_u32 ( r_row0_, r_row1_) ;
142
+ let g_row01 = avx2_pack_u32 ( g_row0_, g_row1_) ;
143
+ let b_row01 = avx2_pack_u32 ( b_row0_, b_row1_) ;
138
144
139
145
let r_row = avx2_pack_u16 ( r_row01, zeros) ;
140
146
let g_row = avx2_pack_u16 ( g_row01, zeros) ;
@@ -143,7 +149,7 @@ pub unsafe fn avx_linear_to_gamma<const CHANNELS_CONFIGURATION: u8, const USE_AL
143
149
let dst_ptr = dst. add ( dst_offset as usize + cx * channels) ;
144
150
145
151
if USE_ALPHA {
146
- let a_row01 = avx2_pack_s32 ( a_row0_, a_row1_) ;
152
+ let a_row01 = avx2_pack_u32 ( a_row0_, a_row1_) ;
147
153
let a_row = avx2_pack_u16 ( a_row01, zeros) ;
148
154
avx_store_and_interleave_v4_half_u8 ! (
149
155
dst_ptr,
0 commit comments