Skip to content

Commit 979baf7

Browse files
committed
More codegen improvements to bokeh blur
1 parent cb5c868 commit 979baf7

File tree

2 files changed

+29
-18
lines changed

2 files changed

+29
-18
lines changed

src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor.cs

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -134,23 +134,29 @@ public void Invoke(int y)
134134
// The target buffer is zeroed initially and then it accumulates the results
135135
// of each partial convolution, so we don't have to clear it here as well
136136
ref Vector4 targetBase = ref this.targetValues.GetElementUnsafe(boundsX, y);
137-
ref Complex64 kernelBase = ref this.kernel[0];
137+
ref Complex64 kernelStart = ref this.kernel[0];
138+
ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelStart, kernelSize);
138139

139-
for (int kY = 0; kY < kernelSize; kY++)
140+
while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
140141
{
141142
// Get the precalculated source sample row for this kernel row and copy to our buffer
142-
int sampleY = Unsafe.Add(ref sampleRowBase, kY);
143-
ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleY);
144-
Complex64 factor = Unsafe.Add(ref kernelBase, kY);
143+
ref ComplexVector4 sourceBase = ref this.sourceValues.GetElementUnsafe(0, sampleRowBase);
144+
ref ComplexVector4 sourceEnd = ref Unsafe.Add(ref sourceBase, boundsWidth);
145+
ref Vector4 targetStart = ref targetBase;
146+
Complex64 factor = kernelStart;
145147

146-
for (int x = 0; x < boundsWidth; x++)
148+
while (Unsafe.IsAddressLessThan(ref sourceBase, ref sourceEnd))
147149
{
148-
ref Vector4 target = ref Unsafe.Add(ref targetBase, x);
149-
ComplexVector4 sample = Unsafe.Add(ref sourceBase, x);
150-
ComplexVector4 partial = factor * sample;
150+
ComplexVector4 partial = factor * sourceBase;
151151

152-
target += partial.WeightedSum(this.z, this.w);
152+
targetStart += partial.WeightedSum(this.z, this.w);
153+
154+
sourceBase = ref Unsafe.Add(ref sourceBase, 1);
155+
targetStart = ref Unsafe.Add(ref targetStart, 1);
153156
}
157+
158+
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
159+
sampleRowBase = ref Unsafe.Add(ref sampleRowBase, 1);
154160
}
155161
}
156162
}

src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,32 +233,37 @@ public void Invoke(int y, Span<Vector4> span)
233233
// Clear the target buffer for each row run
234234
Span<ComplexVector4> targetBuffer = this.targetValues.GetRowSpan(y);
235235
targetBuffer.Clear();
236-
ref ComplexVector4 targetBase = ref MemoryMarshal.GetReference(targetBuffer);
237236

238237
// Execute the bulk pixel format conversion for the current row
239238
Span<TPixel> sourceRow = this.sourcePixels.GetRowSpan(y).Slice(boundsX, boundsWidth);
240239
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, sourceRow, span);
241240

242241
ref Vector4 sourceBase = ref MemoryMarshal.GetReference(span);
242+
ref ComplexVector4 targetStart = ref MemoryMarshal.GetReference(targetBuffer);
243+
ref ComplexVector4 targetEnd = ref Unsafe.Add(ref targetStart, span.Length);
243244
ref Complex64 kernelBase = ref this.kernel[0];
245+
ref Complex64 kernelEnd = ref Unsafe.Add(ref kernelBase, kernelSize);
244246
ref int sampleColumnBase = ref MemoryMarshal.GetReference(this.map.GetColumnOffsetSpan());
245247

246-
for (int x = 0; x < span.Length; x++)
248+
while (Unsafe.IsAddressLessThan(ref targetStart, ref targetEnd))
247249
{
248-
ref ComplexVector4 target = ref Unsafe.Add(ref targetBase, x);
250+
ref Complex64 kernelStart = ref kernelBase;
251+
ref int sampleColumnStart = ref sampleColumnBase;
249252

250-
for (int kX = 0; kX < kernelSize; kX++)
253+
while (Unsafe.IsAddressLessThan(ref kernelStart, ref kernelEnd))
251254
{
252-
int sampleX = Unsafe.Add(ref sampleColumnBase, kX) - boundsX;
253-
Vector4 sample = Unsafe.Add(ref sourceBase, sampleX);
254-
Complex64 factor = Unsafe.Add(ref kernelBase, kX);
255+
Vector4 sample = Unsafe.Add(ref sourceBase, sampleColumnStart - boundsX);
255256

256-
target.Sum(factor * sample);
257+
targetStart.Sum(kernelStart * sample);
258+
259+
kernelStart = ref Unsafe.Add(ref kernelStart, 1);
260+
sampleColumnStart = ref Unsafe.Add(ref sampleColumnStart, 1);
257261
}
258262

259263
// Shift the base column sampling reference by one row at the end of each outer
260264
// iteration so that the inner tight loop indexing can skip the multiplication
261265
sampleColumnBase = ref Unsafe.Add(ref sampleColumnBase, kernelSize);
266+
targetStart = ref Unsafe.Add(ref targetStart, 1);
262267
}
263268
}
264269
}

0 commit comments

Comments
 (0)