Skip to content

Commit 1c50643

Browse files
johnstiles-googleSkia Commit-Bot
authored andcommitted
Optimize Gaussian convolution fragment processor.
This FP now uses an explicit return statement instead of sk_OutColor. Additionally, simplified the generated code by removing a temp variable that did nothing (coordSampled), and by removing a needless addition at the end of the loop. It looks like coordSampled was useful when it was introduced at http://review.skia.org/20465, but it was later rendered useless. Change-Id: I8c549b16b4d422d7faeab48e2087f168ad5788b5 Bug: skia:10549 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/344156 Commit-Queue: John Stiles <johnstiles@google.com> Reviewed-by: Brian Osman <brianosman@google.com> Auto-Submit: John Stiles <johnstiles@google.com>
1 parent 1d2b075 commit 1c50643

File tree

2 files changed

+9
-12
lines changed

2 files changed

+9
-12
lines changed

src/gpu/effects/GrGaussianConvolutionFragmentProcessor.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,24 +57,20 @@ void GrGaussianConvolutionFragmentProcessor::Impl::emitCode(EmitArgs& args) {
5757

5858
GrGLSLFPFragmentBuilder* fragBuilder = args.fFragBuilder;
5959

60-
fragBuilder->codeAppendf("%s = half4(0);", args.fOutputColor);
60+
fragBuilder->codeAppendf("half4 color = half4(0);");
6161

6262
fragBuilder->codeAppendf("float2 coord = %s - %d.0 * %s;", args.fSampleCoord, ce.fRadius, inc);
63-
fragBuilder->codeAppend("float2 coordSampled = half2(0);");
6463

6564
// Manually unroll loop because some drivers don't; yields 20-30% speedup.
66-
static constexpr const char* kVecSuffix[4] = {".x", ".y", ".z", ".w"};
6765
for (int i = 0; i < width; i++) {
68-
SkString kernelIndex;
69-
kernelIndex.printf("%s[%d]", kernel, i/4);
70-
kernelIndex.append(kVecSuffix[i & 0x3]);
71-
72-
fragBuilder->codeAppend("coordSampled = coord;");
73-
auto sample = this->invokeChild(0, args, "coordSampled");
74-
fragBuilder->codeAppendf("%s += %s", args.fOutputColor, sample.c_str());
75-
fragBuilder->codeAppendf(" * %s;", kernelIndex.c_str());
76-
fragBuilder->codeAppendf("coord += %s;", inc);
66+
auto sample = this->invokeChild(/*childIndex=*/0, args, "coord");
67+
if (i != 0) {
68+
fragBuilder->codeAppendf("coord += %s;", inc);
69+
}
70+
fragBuilder->codeAppendf("color += %s * %s[%d][%d];",
71+
sample.c_str(), kernel, i / 4, i & 0x3);
7772
}
73+
fragBuilder->codeAppendf("return color;");
7874
}
7975

8076
void GrGaussianConvolutionFragmentProcessor::Impl::onSetData(const GrGLSLProgramDataManager& pdman,

src/gpu/effects/GrGaussianConvolutionFragmentProcessor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class GrGaussianConvolutionFragmentProcessor : public GrFragmentProcessor {
3939
const GrCaps&);
4040

4141
const char* name() const override { return "GaussianConvolution"; }
42+
bool usesExplicitReturn() const override { return true; }
4243

4344
std::unique_ptr<GrFragmentProcessor> clone() const override {
4445
return std::unique_ptr<GrFragmentProcessor>(

0 commit comments

Comments
 (0)