@@ -124,12 +124,13 @@ private void OnFrameApplyCore(
124124 // This is needed because the bokeh blur operates as TPixel -> complex -> TPixel, so we cannot
125125 // convert back to standard pixels after each separate 1D convolution pass. Like in the gaussian
126126 // blur though, we preallocate and compute the kernel sampling maps before processing each complex
127- // component, to avoid recomputing the same sampling map once per convolution pass.
128- using var mapX = new KernelSamplingMap ( configuration . MemoryAllocator ) ;
129- using var mapY = new KernelSamplingMap ( configuration . MemoryAllocator ) ;
127+ // component, to avoid recomputing the same sampling map once per convolution pass. Since we are
128+ // doing two 1D convolutions with the same kernel, we can use a single kernel sampling map as if
129+ // we were using a 2D kernel with each dimension being the same as the length of our kernel, and
130+ // use the two sampling offset spans resulting from this same map. This saves some extra work.
131+ using var mapXY = new KernelSamplingMap ( configuration . MemoryAllocator ) ;
130132
131- mapX . BuildSamplingOffsetMap ( 1 , this . kernelSize , sourceRectangle ) ;
132- mapY . BuildSamplingOffsetMap ( this . kernelSize , 1 , sourceRectangle ) ;
133+ mapXY . BuildSamplingOffsetMap ( this . kernelSize , this . kernelSize , sourceRectangle ) ;
133134
134135 ref Complex64 [ ] baseRef = ref MemoryMarshal . GetReference ( this . kernels . AsSpan ( ) ) ;
135136 ref Vector4 paramsRef = ref MemoryMarshal . GetReference ( this . kernelParameters . AsSpan ( ) ) ;
@@ -146,7 +147,7 @@ private void OnFrameApplyCore(
146147 sourceRectangle ,
147148 firstPassBuffer ,
148149 source . PixelBuffer ,
149- mapX ,
150+ mapXY ,
150151 kernel ,
151152 configuration ) ;
152153
@@ -160,7 +161,7 @@ private void OnFrameApplyCore(
160161 sourceRectangle ,
161162 processingBuffer ,
162163 firstPassBuffer ,
163- mapY ,
164+ mapXY ,
164165 kernel ,
165166 parameters . Z ,
166167 parameters . W ) ;
@@ -209,22 +210,18 @@ public void Invoke(int y, Span<Vector4> span)
209210 int boundsWidth = this . bounds . Width ;
210211 int kernelSize = this . kernel . Length ;
211212
212- Span < int > rowOffsets = this . map . GetRowOffsetSpan ( ) ;
213- Span < int > columnOffsets = this . map . GetColumnOffsetSpan ( ) ;
214- int sampleY = Unsafe . Add ( ref MemoryMarshal . GetReference ( rowOffsets ) , y - this . bounds . Y ) ;
215- ref int sampleColumnBase = ref MemoryMarshal . GetReference ( columnOffsets ) ;
216-
217213 // Clear the target buffer for each row run
218214 Span < ComplexVector4 > targetBuffer = this . targetValues . GetRowSpan ( y ) ;
219215 targetBuffer . Clear ( ) ;
220216 ref ComplexVector4 targetBase = ref MemoryMarshal . GetReference ( targetBuffer ) ;
221217
222218 // Execute the bulk pixel format conversion for the current row
223- Span < TPixel > sourceRow = this . sourcePixels . GetRowSpan ( sampleY ) . Slice ( boundsX , boundsWidth ) ;
219+ Span < TPixel > sourceRow = this . sourcePixels . GetRowSpan ( y ) . Slice ( boundsX , boundsWidth ) ;
224220 PixelOperations < TPixel > . Instance . ToVector4 ( this . configuration , sourceRow , span ) ;
225221
226222 ref Vector4 sourceBase = ref MemoryMarshal . GetReference ( span ) ;
227223 ref Complex64 kernelBase = ref this . kernel [ 0 ] ;
224+ ref int sampleColumnBase = ref MemoryMarshal . GetReference ( this . map . GetColumnOffsetSpan ( ) ) ;
228225
229226 for ( int x = 0 ; x < span . Length ; x ++ )
230227 {
0 commit comments