Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

[Impeller] applied the lerp hack to blur (roughly 2x speedup?) #50790

Merged
merged 7 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ fml::StatusOr<RenderTarget> MakeBlurSubpass(
GaussianBlurVertexShader::BindFrameInfo(
pass, host_buffer.EmplaceUniform(frame_info));
GaussianBlurFragmentShader::BindKernelSamples(
pass, host_buffer.EmplaceUniform(GenerateBlurInfo(blur_info)));
pass, host_buffer.EmplaceUniform(
LerpHackKernelSamples(GenerateBlurInfo(blur_info))));
return pass.Draw().ok();
};
if (destination_target.has_value()) {
Expand Down Expand Up @@ -507,4 +508,32 @@ KernelPipeline::FragmentShader::KernelSamples GenerateBlurInfo(
return result;
}

// This works by shrinking the kernel size by 2 and relying on lerp to read
// between the samples.
KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples(
KernelPipeline::FragmentShader::KernelSamples parameters) {
KernelPipeline::FragmentShader::KernelSamples result;
result.sample_count = ((parameters.sample_count - 1) / 2) + 1;
int32_t middle = result.sample_count / 2;
int32_t j = 0;
for (int i = 0; i < result.sample_count; i++) {
if (i == middle) {
result.samples[i] = parameters.samples[j++];
} else {
KernelPipeline::FragmentShader::KernelSample left = parameters.samples[j];
KernelPipeline::FragmentShader::KernelSample right =
parameters.samples[j + 1];
result.samples[i] = KernelPipeline::FragmentShader::KernelSample{
.uv_offset = (left.uv_offset * left.coefficient +
right.uv_offset * right.coefficient) /
(left.coefficient + right.coefficient),
.coefficient = left.coefficient + right.coefficient,
};
j += 2;
}
}

return result;
}

} // namespace impeller
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ struct BlurParameters {
KernelPipeline::FragmentShader::KernelSamples GenerateBlurInfo(
BlurParameters parameters);

/// This will shrink the size of a kernel by roughly half by sampling between
/// samples and relying on linear interpolation between the samples.
KernelPipeline::FragmentShader::KernelSamples LerpHackKernelSamples(
KernelPipeline::FragmentShader::KernelSamples samples);

/// Performs a bidirectional Gaussian blur.
///
/// This is accomplished by rendering multiple passes in multiple directions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@
#include "impeller/geometry/geometry_asserts.h"
#include "impeller/renderer/testing/mocks.h"

#if FML_OS_MACOSX
#define IMPELLER_RAND arc4random
#else
#define IMPELLER_RAND rand
#endif

namespace impeller {
namespace testing {

Expand Down Expand Up @@ -487,5 +493,138 @@ TEST(GaussianBlurFilterContentsTest, Coefficients) {
}
}

TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesSimple) {
KernelPipeline::FragmentShader::KernelSamples kernel_samples = {
.sample_count = 5,
.samples =
{
{
.uv_offset = Vector2(-2, 0),
.coefficient = 0.1f,
},
{
.uv_offset = Vector2(-1, 0),
.coefficient = 0.2f,
},
{
.uv_offset = Vector2(0, 0),
.coefficient = 0.4f,
},
{
.uv_offset = Vector2(1, 0),
.coefficient = 0.2f,
},
{
.uv_offset = Vector2(2, 0),
.coefficient = 0.1f,
},
},
};

KernelPipeline::FragmentShader::KernelSamples fast_kernel_samples =
LerpHackKernelSamples(kernel_samples);
EXPECT_EQ(fast_kernel_samples.sample_count, 3);

KernelPipeline::FragmentShader::KernelSample* samples =
kernel_samples.samples;
KernelPipeline::FragmentShader::KernelSample* fast_samples =
fast_kernel_samples.samples;

//////////////////////////////////////////////////////////////////////////////
// Check output kernel.

EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.x, -1.3333333);
EXPECT_FLOAT_EQ(fast_samples[0].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[0].coefficient, 0.3);
EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.x, 0);
EXPECT_FLOAT_EQ(fast_samples[1].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[1].coefficient, 0.4);
EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.x, 1.3333333);
EXPECT_FLOAT_EQ(fast_samples[2].uv_offset.y, 0);
EXPECT_FLOAT_EQ(fast_samples[2].coefficient, 0.3);

//////////////////////////////////////////////////////////////////////////////
// Check output of fast kernel versus original kernel.

Scalar data[5] = {0.25, 0.5, 0.5, 1.0, 0.2};
Scalar original_output =
samples[0].coefficient * data[0] + samples[1].coefficient * data[1] +
samples[2].coefficient * data[2] + samples[3].coefficient * data[3] +
samples[4].coefficient * data[4];

auto lerp = [](const Point& point, Scalar left, Scalar right) {
Scalar int_part;
Scalar fract = fabsf(modf(point.x, &int_part));
if (point.x < 0) {
return left * fract + right * (1.0 - fract);
} else {
return left * (1.0 - fract) + right * fract;
}
};
Scalar fast_output =
/*1st*/ lerp(fast_samples[0].uv_offset, data[0], data[1]) *
fast_samples[0].coefficient +
/*2nd*/ data[2] * fast_samples[1].coefficient +
/*3rd*/ lerp(fast_samples[2].uv_offset, data[3], data[4]) *
fast_samples[2].coefficient;

EXPECT_NEAR(original_output, fast_output, 0.01);
}

TEST(GaussianBlurFilterContentsTest, LerpHackKernelSamplesComplex) {
Scalar sigma = 10.0f;
int32_t blur_radius = static_cast<int32_t>(
std::ceil(GaussianBlurFilterContents::CalculateBlurRadius(sigma)));
BlurParameters parameters = {.blur_uv_offset = Point(1, 0),
.blur_sigma = sigma,
.blur_radius = blur_radius,
.step_size = 1};
KernelPipeline::FragmentShader::KernelSamples kernel_samples =
GenerateBlurInfo(parameters);
EXPECT_EQ(kernel_samples.sample_count, 33);
KernelPipeline::FragmentShader::KernelSamples fast_kernel_samples =
LerpHackKernelSamples(kernel_samples);
EXPECT_EQ(fast_kernel_samples.sample_count, 17);
float data[33];
srand(0);
for (int i = 0; i < 33; i++) {
data[i] = 255.0 * static_cast<double>(IMPELLER_RAND()) / RAND_MAX;
}

auto sampler = [data](Point point) -> Scalar {
FML_CHECK(point.y == 0.0f);
FML_CHECK(point.x >= -16);
FML_CHECK(point.x <= 16);
Scalar fint_part;
Scalar fract = fabsf(modf(point.x, &fint_part));
if (fract == 0) {
int32_t int_part = static_cast<int32_t>(fint_part) + 16;
return data[int_part];
} else {
int32_t left = static_cast<int32_t>(floor(point.x)) + 16;
int32_t right = static_cast<int32_t>(ceil(point.x)) + 16;
if (point.x < 0) {
return fract * data[left] + (1.0 - fract) * data[right];
} else {
return (1.0 - fract) * data[left] + fract * data[right];
}
}
};

Scalar output = 0.0;
for (int i = 0; i < kernel_samples.sample_count; ++i) {
auto sample = kernel_samples.samples[i];
output += sample.coefficient * sampler(sample.uv_offset);
}

Scalar fast_output = 0.0;
for (int i = 0; i < fast_kernel_samples.sample_count; ++i) {
auto sample = fast_kernel_samples.samples[i];
fast_output += sample.coefficient * sampler(sample.uv_offset);
}

EXPECT_NEAR(output, fast_output, 0.1);
}

} // namespace testing
} // namespace impeller