55using System . Numerics ;
66using System . Runtime . CompilerServices ;
77using System . Runtime . InteropServices ;
8+ #if SUPPORTS_RUNTIME_INTRINSICS
9+ using System . Runtime . Intrinsics ;
10+ using System . Runtime . Intrinsics . X86 ;
11+ #endif
812
913namespace SixLabors . ImageSharp . ColorSpaces . Companding
1014{
@@ -18,49 +22,119 @@ namespace SixLabors.ImageSharp.ColorSpaces.Companding
1822 /// </remarks>
1923 public static class SRgbCompanding
2024 {
25+ private const int Length = Scale + 2 ; // 256kb @ 16bit precision.
26+ private const int Scale = ( 1 << 16 ) - 1 ;
27+
28+ private static readonly Lazy < float [ ] > LazyCompressTable = new Lazy < float [ ] > (
29+ ( ) =>
30+ {
31+ var result = new float [ Length ] ;
32+
33+ for ( int i = 0 ; i < result . Length ; i ++ )
34+ {
35+ double d = ( double ) i / Scale ;
36+ if ( d <= ( 0.04045 / 12.92 ) )
37+ {
38+ d *= 12.92 ;
39+ }
40+ else
41+ {
42+ d = ( 1.055 * Math . Pow ( d , 1.0 / 2.4 ) ) - 0.055 ;
43+ }
44+
45+ result [ i ] = ( float ) d ;
46+ }
47+
48+ return result ;
49+ } ,
50+ true ) ;
51+
52+ private static readonly Lazy < float [ ] > LazyExpandTable = new Lazy < float [ ] > (
53+ ( ) =>
54+ {
55+ var result = new float [ Length ] ;
56+
57+ for ( int i = 0 ; i < result . Length ; i ++ )
58+ {
59+ double d = ( double ) i / Scale ;
60+ if ( d <= 0.04045 )
61+ {
62+ d /= 12.92 ;
63+ }
64+ else
65+ {
66+ d = Math . Pow ( ( d + 0.055 ) / 1.055 , 2.4 ) ;
67+ }
68+
69+ result [ i ] = ( float ) d ;
70+ }
71+
72+ return result ;
73+ } ,
74+ true ) ;
75+
76+ private static float [ ] ExpandTable => LazyExpandTable . Value ;
77+
78+ private static float [ ] CompressTable => LazyCompressTable . Value ;
79+
2180 /// <summary>
2281 /// Expands the companded vectors to their linear equivalents with respect to the energy.
2382 /// </summary>
2483 /// <param name="vectors">The span of vectors.</param>
25- [ MethodImpl ( InliningOptions . ShortMethod ) ]
84+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
2685 public static void Expand ( Span < Vector4 > vectors )
2786 {
28- ref Vector4 vectorsStart = ref MemoryMarshal . GetReference ( vectors ) ;
29- ref Vector4 vectorsEnd = ref Unsafe . Add ( ref vectorsStart , vectors . Length ) ;
30-
31- while ( Unsafe . IsAddressLessThan ( ref vectorsStart , ref vectorsEnd ) )
87+ #if SUPPORTS_RUNTIME_INTRINSICS
88+ if ( Avx2 . IsSupported && vectors . Length >= 2 )
3289 {
33- Expand ( ref vectorsStart ) ;
90+ CompandAvx2 ( vectors , ExpandTable ) ;
3491
35- vectorsStart = ref Unsafe . Add ( ref vectorsStart , 1 ) ;
92+ if ( Numerics . Modulo2 ( vectors . Length ) != 0 )
93+ {
94+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
95+ Expand ( ref MemoryMarshal . GetReference ( vectors . Slice ( vectors . Length - 1 ) ) ) ;
96+ }
97+ }
98+ else
99+ #endif
100+ {
101+ CompandScalar ( vectors , ExpandTable ) ;
36102 }
37103 }
38104
39105 /// <summary>
40106 /// Compresses the uncompanded vectors to their nonlinear equivalents with respect to the energy.
41107 /// </summary>
42108 /// <param name="vectors">The span of vectors.</param>
43- [ MethodImpl ( InliningOptions . ShortMethod ) ]
44- public static void Compress ( Span < Vector4 > vectors )
109+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
110+ public static unsafe void Compress ( Span < Vector4 > vectors )
45111 {
46- ref Vector4 vectorsStart = ref MemoryMarshal . GetReference ( vectors ) ;
47- ref Vector4 vectorsEnd = ref Unsafe . Add ( ref vectorsStart , vectors . Length ) ;
48-
49- while ( Unsafe . IsAddressLessThan ( ref vectorsStart , ref vectorsEnd ) )
112+ #if SUPPORTS_RUNTIME_INTRINSICS
113+ if ( Avx2 . IsSupported && vectors . Length >= 2 )
50114 {
51- Compress ( ref vectorsStart ) ;
115+ CompandAvx2 ( vectors , CompressTable ) ;
52116
53- vectorsStart = ref Unsafe . Add ( ref vectorsStart , 1 ) ;
117+ if ( Numerics . Modulo2 ( vectors . Length ) != 0 )
118+ {
119+ // Vector4 fits neatly in pairs. Any overlap has to be equal to 1.
120+ Compress ( ref MemoryMarshal . GetReference ( vectors . Slice ( vectors . Length - 1 ) ) ) ;
121+ }
122+ }
123+ else
124+ #endif
125+ {
126+ CompandScalar ( vectors , CompressTable ) ;
54127 }
55128 }
56129
57130 /// <summary>
58131 /// Expands a companded vector to its linear equivalent with respect to the energy.
59132 /// </summary>
60133 /// <param name="vector">The vector.</param>
61- [ MethodImpl ( InliningOptions . ShortMethod ) ]
134+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
62135 public static void Expand ( ref Vector4 vector )
63136 {
137+ // Alpha is already a linear representation of opacity so we do not want to convert it.
64138 vector . X = Expand ( vector . X ) ;
65139 vector . Y = Expand ( vector . Y ) ;
66140 vector . Z = Expand ( vector . Z ) ;
@@ -70,9 +144,10 @@ public static void Expand(ref Vector4 vector)
70144 /// Compresses an uncompanded vector (linear) to its nonlinear equivalent.
71145 /// </summary>
72146 /// <param name="vector">The vector.</param>
73- [ MethodImpl ( InliningOptions . ShortMethod ) ]
147+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
74148 public static void Compress ( ref Vector4 vector )
75149 {
150+ // Alpha is already a linear representation of opacity so we do not want to convert it.
76151 vector . X = Compress ( vector . X ) ;
77152 vector . Y = Compress ( vector . Y ) ;
78153 vector . Z = Compress ( vector . Z ) ;
@@ -83,15 +158,84 @@ public static void Compress(ref Vector4 vector)
83158 /// </summary>
84159 /// <param name="channel">The channel value.</param>
85160 /// <returns>The <see cref="float"/> representing the linear channel value.</returns>
86- [ MethodImpl ( InliningOptions . ShortMethod ) ]
87- public static float Expand ( float channel ) => channel <= 0.04045F ? channel / 12.92F : MathF . Pow ( ( channel + 0.055F ) / 1.055F , 2.4F ) ;
161+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
162+ public static float Expand ( float channel )
163+ => channel <= 0.04045F ? channel / 12.92F : MathF . Pow ( ( channel + 0.055F ) / 1.055F , 2.4F ) ;
88164
89165 /// <summary>
90166 /// Compresses an uncompanded channel (linear) to its nonlinear equivalent.
91167 /// </summary>
92168 /// <param name="channel">The channel value.</param>
93169 /// <returns>The <see cref="float"/> representing the nonlinear channel value.</returns>
94- [ MethodImpl ( InliningOptions . ShortMethod ) ]
95- public static float Compress ( float channel ) => channel <= 0.0031308F ? 12.92F * channel : ( 1.055F * MathF . Pow ( channel , 0.416666666666667F ) ) - 0.055F ;
170+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
171+ public static float Compress ( float channel )
172+ => channel <= 0.0031308F ? 12.92F * channel : ( 1.055F * MathF . Pow ( channel , 0.416666666666667F ) ) - 0.055F ;
173+
174+ #if SUPPORTS_RUNTIME_INTRINSICS
175+
176+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
177+ private static unsafe void CompandAvx2 ( Span < Vector4 > vectors , float [ ] table )
178+ {
179+ fixed ( float * tablePointer = & table [ 0 ] )
180+ {
181+ var scale = Vector256 . Create ( ( float ) Scale ) ;
182+ Vector256 < float > zero = Vector256 < float > . Zero ;
183+ var offset = Vector256 . Create ( 1 ) ;
184+
185+ // Divide by 2 as 4 elements per Vector4 and 8 per Vector256<float>
186+ ref Vector256 < float > vectorsBase = ref Unsafe . As < Vector4 , Vector256 < float > > ( ref MemoryMarshal . GetReference ( vectors ) ) ;
187+ ref Vector256 < float > vectorsLast = ref Unsafe . Add ( ref vectorsBase , ( IntPtr ) ( ( uint ) vectors . Length / 2u ) ) ;
188+
189+ while ( Unsafe . IsAddressLessThan ( ref vectorsBase , ref vectorsLast ) )
190+ {
191+ Vector256 < float > multiplied = Avx . Multiply ( scale , vectorsBase ) ;
192+ multiplied = Avx . Min ( Avx . Max ( zero , multiplied ) , scale ) ;
193+
194+ Vector256 < int > truncated = Avx . ConvertToVector256Int32WithTruncation ( multiplied ) ;
195+ Vector256 < float > truncatedF = Avx . ConvertToVector256Single ( truncated ) ;
196+
197+ Vector256 < float > low = Avx2 . GatherVector256 ( tablePointer , truncated , sizeof ( float ) ) ;
198+ Vector256 < float > high = Avx2 . GatherVector256 ( tablePointer , Avx2 . Add ( truncated , offset ) , sizeof ( float ) ) ;
199+
200+ // Alpha is already a linear representation of opacity so we do not want to convert it.
201+ Vector256 < float > companded = Numerics . Lerp ( low , high , Avx . Subtract ( multiplied , truncatedF ) ) ;
202+ vectorsBase = Avx . Blend ( companded , vectorsBase , Numerics . BlendAlphaControl ) ;
203+ vectorsBase = ref Unsafe . Add ( ref vectorsBase , 1 ) ;
204+ }
205+ }
206+ }
207+ #endif
208+
209+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
210+ private static unsafe void CompandScalar ( Span < Vector4 > vectors , float [ ] table )
211+ {
212+ fixed ( float * tablePointer = & table [ 0 ] )
213+ {
214+ Vector4 zero = Vector4 . Zero ;
215+ var scale = new Vector4 ( Scale ) ;
216+ ref Vector4 vectorsBase = ref MemoryMarshal . GetReference ( vectors ) ;
217+ ref Vector4 vectorsLast = ref Unsafe . Add ( ref vectorsBase , vectors . Length ) ;
218+
219+ while ( Unsafe . IsAddressLessThan ( ref vectorsBase , ref vectorsLast ) )
220+ {
221+ Vector4 multiplied = Numerics . Clamp ( vectorsBase * Scale , zero , scale ) ;
222+
223+ float f0 = multiplied . X ;
224+ float f1 = multiplied . Y ;
225+ float f2 = multiplied . Z ;
226+
227+ uint i0 = ( uint ) f0 ;
228+ uint i1 = ( uint ) f1 ;
229+ uint i2 = ( uint ) f2 ;
230+
231+ // Alpha is already a linear representation of opacity so we do not want to convert it.
232+ vectorsBase . X = Numerics . Lerp ( tablePointer [ i0 ] , tablePointer [ i0 + 1 ] , f0 - ( int ) i0 ) ;
233+ vectorsBase . Y = Numerics . Lerp ( tablePointer [ i1 ] , tablePointer [ i1 + 1 ] , f1 - ( int ) i1 ) ;
234+ vectorsBase . Z = Numerics . Lerp ( tablePointer [ i2 ] , tablePointer [ i2 + 1 ] , f2 - ( int ) i2 ) ;
235+
236+ vectorsBase = ref Unsafe . Add ( ref vectorsBase , 1 ) ;
237+ }
238+ }
239+ }
96240 }
97241}
0 commit comments