Skip to content

Commit

Permalink
General performance impovements (tippesi#24)
Browse files Browse the repository at this point in the history
* Improved cloud performance

* Fixed some issues

* Improvements for the cloud temporal resolve

* Better blue noise sampling

* Small adjustments

* Fixed volumetric clouds temporal rejection

* Some smaller changes
  • Loading branch information
tippesi authored Jan 30, 2023
1 parent c4d8eb7 commit 1cec285
Show file tree
Hide file tree
Showing 26 changed files with 198 additions and 121 deletions.
Binary file added data/scrambling_ranking.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
26 changes: 11 additions & 15 deletions data/shader/ao/rtao.csh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <../common/random.hsh>
#include <../common/flatten.hsh>
#include <../common/convert.hsh>
#include <../common/bluenoise.hsh>
#include <../brdf/brdfSample.hsh>

layout (local_size_x = 8, local_size_y = 4) in;
Expand All @@ -15,8 +16,9 @@ layout (set = 3, binding = 0, r16f) writeonly uniform image2D rtaoImage;

layout(set = 3, binding = 1) uniform sampler2D normalTexture;
layout(set = 3, binding = 2) uniform sampler2D shadowMap;
layout(set = 3, binding = 3) uniform sampler2D randomTexture;
layout(set = 3, binding = 4) uniform isampler2D offsetTexture;
layout(set = 3, binding = 3) uniform isampler2D offsetTexture;
layout(set = 3, binding = 4) uniform sampler2D scramblingRankingTexture;
layout(set = 3, binding = 5) uniform sampler2D sobolSequenceTexture;

const ivec2 offsets[4] = ivec2[4](
ivec2(0, 0),
Expand All @@ -25,9 +27,9 @@ const ivec2 offsets[4] = ivec2[4](
ivec2(1, 1)
);

layout(set = 3, binding = 5) uniform UniformBuffer {
layout(set = 3, binding = 6) uniform UniformBuffer {
float radius;
uint frameSeed;
int frameSeed;
} uniforms;

void main() {
Expand Down Expand Up @@ -62,26 +64,20 @@ void main() {
vec2 recontructTexCoord = (2.0 * vec2(pixel) + offset + vec2(0.5)) / (2.0 * vec2(resolution));
vec3 worldPos = vec3(globalData.ivMatrix * vec4(ConvertDepthToViewSpace(depth, recontructTexCoord), 1.0));
vec3 worldNorm = normalize(vec3(globalData.ivMatrix * vec4(2.0 * textureLod(normalTexture, texCoord, 0).rgb - 1.0, 0.0)));
float seed = texelFetch(randomTexture, pixel % ivec2(4), 0).r;

float ao = 0.0;

float raySeed = float(seed);
float curSeed = float(0);

ivec2 noiseOffset = Unflatten2D(int(uniforms.frameSeed), ivec2(16)) * ivec2(8);
vec2 blueNoiseVec = texelFetch(randomTexture, (pixel + noiseOffset) % ivec2(128), 0).xy * 256.0;
blueNoiseVec = clamp(blueNoiseVec, 0.0, 255.0);
blueNoiseVec = (blueNoiseVec + 0.5) / 256.0;
int sampleIdx = int(uniforms.frameSeed);
vec2 blueNoiseVec = vec2(
SampleBlueNoise(pixel, sampleIdx, 0, scramblingRankingTexture, sobolSequenceTexture),
SampleBlueNoise(pixel, sampleIdx, 1, scramblingRankingTexture, sobolSequenceTexture)
);

const int sampleCount = 1;
for (uint i = 0; i < sampleCount; i++) {
Ray ray;
Surface surface;

float u0 = random(raySeed, curSeed);
float u1 = random(raySeed, curSeed);

surface.N = worldNorm;
surface.P = worldPos;
BRDFSample brdfSample = SampleDiffuseBRDF(surface, blueNoiseVec);
Expand Down
4 changes: 2 additions & 2 deletions data/shader/ao/temporal.csh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include <../common/flatten.hsh>
#include <../common/random.hsh>

layout (local_size_x = 8, local_size_y = 8) in;
layout (local_size_x = 16, local_size_y = 16) in;

layout(set = 3, binding = 0, r16f) writeonly uniform image2D resolveImage;
layout(set = 3, binding = 1, r16f) writeonly uniform image2D momentsImage;
Expand Down Expand Up @@ -207,7 +207,7 @@ void ComputeVarianceMinMax(out float aabbMin, out float aabbMax) {
float sampleAo = FetchCurrentAo(sharedMemoryIdx);
float sampleLinearDepth = FetchDepth(sharedMemoryIdx);

float depthPhi = max(1.0, abs(0.125 * linearDepth));
float depthPhi = max(1.0, abs(0.025 * linearDepth));
float weight = min(1.0 , exp(-abs(linearDepth - sampleLinearDepth) / depthPhi));

sampleAo *= weight;
Expand Down
8 changes: 6 additions & 2 deletions data/shader/clouds/detailNoise.csh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

layout (local_size_x = 4, local_size_y = 4, local_size_z = 4) in;

layout(set = 3, binding = 0, rgba16f) uniform image3D noiseImage;
layout(set = 3, binding = 0, r16f) uniform image3D noiseImage;

layout(push_constant) uniform constants {
float seed;
Expand All @@ -24,6 +24,10 @@ void main() {
noise.b = Worley4Octaves(pos, 4.0 * baseScale, pushConstants.seed, weights);
noise.a = Worley4Octaves(pos, 8.0 * baseScale, pushConstants.seed, weights);

imageStore(noiseImage, pixel, noise);
float highFrequenyFBM = noise.r * 0.625
+ noise.g * 0.250
+ noise.b * 0.125;

imageStore(noiseImage, pixel, vec4(highFrequenyFBM, 0.0, 0.0, 0.0));

}
21 changes: 8 additions & 13 deletions data/shader/clouds/integrate.csh
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,13 @@ float SampleDensity(vec3 pos, vec3 shapeTexCoords, vec3 detailTexCoords,

float lod = 0.0;

vec4 lowFrequencyNoise = textureLod(shapeTexture, shapeTexCoords, lod);
vec2 lowFrequencyNoise = textureLod(shapeTexture, shapeTexCoords, lod).rg;

float lowFrequenyFBM = lowFrequencyNoise.g * 0.625
+ lowFrequencyNoise.b * 0.250
+ lowFrequencyNoise.a * 0.125;
float lowFrequencyBaseNoise = lowFrequencyNoise.r;
float lowFrequencyFBM = lowFrequencyNoise.g;

float baseCloudDensity = Remap(lowFrequencyNoise.r,
-(1.0 - lowFrequenyFBM), 1.0, 0.0, 1.0);
float baseCloudDensity = Remap(lowFrequencyBaseNoise,
-(1.0 - lowFrequencyFBM), 1.0, 0.0, 1.0);

float heightFraction = shapeTexCoords.y;
float densityHeightGradient = exp(-uniforms.upperHeightFalloff * heightFraction) *
Expand All @@ -140,14 +139,10 @@ float SampleDensity(vec3 pos, vec3 shapeTexCoords, vec3 detailTexCoords,
float finalCloudDensity = baseCloudDensity;

if (baseCloudDensity > 0.0) {
vec4 highFrequencyNoise = textureLod(detailTexture, detailTexCoords, lod);

float highFrequenyFBM = highFrequencyNoise.r * 0.625
+ highFrequencyNoise.g * 0.250
+ highFrequencyNoise.b * 0.125;
float highFrequencyFBM = textureLod(detailTexture, detailTexCoords, lod).r;

float highFrequencyNoiseModifier = mix(highFrequenyFBM,
1.0 - highFrequenyFBM, saturate(heightFraction * 10.0));
float highFrequencyNoiseModifier = mix(highFrequencyFBM,
1.0 - highFrequencyFBM, saturate(heightFraction * 10.0));

finalCloudDensity = Remap(baseCloudDensity,
highFrequencyNoiseModifier * uniforms.detailStrength, 1.0, 0.0, 1.0);
Expand Down
8 changes: 6 additions & 2 deletions data/shader/clouds/shapeNoise.csh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

layout (local_size_x = 4, local_size_y = 4, local_size_z = 4) in;

layout(set = 3, binding = 0, rgba16f) uniform image3D noiseImage;
layout(set = 3, binding = 0, rg16f) uniform image3D noiseImage;

layout(push_constant) uniform constants {
float seed;
Expand All @@ -24,6 +24,10 @@ void main() {
noise.b = Worley4Octaves(pos, 4.0 * baseScale, pushConstants.seed, weights);
noise.a = Worley4Octaves(pos, 8.0 * baseScale, pushConstants.seed, weights);

imageStore(noiseImage, pixel, noise);
float lowFrequenyFBM = noise.g * 0.625
+ noise.b * 0.250
+ noise.a * 0.125;

imageStore(noiseImage, pixel, vec4(noise.r, lowFrequenyFBM, 0.0, 0.0));

}
18 changes: 17 additions & 1 deletion data/shader/clouds/temporal.csh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ layout(set = 3, binding = 2) uniform sampler2D velocityTexture;
layout(set = 3, binding = 3) uniform sampler2D depthTexture;

layout(set = 3, binding = 4) uniform sampler2D historyTexture;
// layout(set = 3, binding = 5) uniform sampler2D historyDepthTexture;
layout(set = 3, binding = 5) uniform sampler2D historyDepthTexture;

vec2 invResolution = 1.0 / vec2(imageSize(resolveImage));
vec2 resolution = vec2(imageSize(resolveImage));
Expand Down Expand Up @@ -222,6 +222,22 @@ void main() {
factor = (uv.x < 0.0 || uv.y < 0.0 || uv.x > 1.0
|| uv.y > 1.0) ? 0.0 : factor;
ivec2 historyPixel = ivec2(vec2(pixel) + velocity * resolution);
float minConfidence = 1.0;
// Calculate confidence over 2x2 bilinear neighborhood
// Note that 3x3 neighborhoud could help on edges
for (int i = 0; i < 9; i++) {
ivec2 offsetPixel = historyPixel + offsets[i];
float confidence = 1.0;
float historyDepth = texelFetch(historyDepthTexture, offsetPixel, 0).r;
confidence *= historyDepth < 1.0 ? 0.0 : 1.0;
minConfidence = min(minConfidence, confidence);
}
factor *= minConfidence;
vec4 resolve = mix(currentValue, historyValue, factor);
imageStore(resolveImage, pixel, resolve);
Expand Down
29 changes: 29 additions & 0 deletions data/shader/common/bluenoise.hsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
Based on:
Eric Heitz, Laurent Belcour, V. Ostromoukhov, David Coeurjolly, and Jean-Claude Iehl. 2019.
A low-discrepancy sampler that distributes monte carlo errors as a blue noise in screen space.
In ACM SIGGRAPH 2019 Talks (SIGGRAPH '19). Association for Computing Machinery, New York, NY, USA,
Article 68, 1–2. https://doi.org/10.1145/3306307.3328191
*/

float SampleBlueNoise(ivec2 pixel, int sampleIndex, int sampleDimension,
sampler2D scramblingRanking, sampler2D sobolSequence) {

// wrap arguments
pixel = pixel % 128;
sampleIndex = sampleIndex % 256;
sampleDimension = sampleDimension % 4;

// xor index based on optimized ranking
int rankedSampleIndex = sampleIndex ^ int(clamp(texelFetch(scramblingRanking, pixel, 0).b * 256.0, 0.0, 255.0));

// fetch value in sequence
int value = int(clamp(texelFetch(sobolSequence, ivec2(rankedSampleIndex, 0), 0)[sampleDimension] * 256.0, 0.0, 255.0));

// If the dimension is optimized, xor sequence value based on optimized scrambling
value = value ^ int(clamp(texelFetch(scramblingRanking, pixel, 0)[sampleDimension % 2] * 256.0, 0.0, 255.0));

// convert to float and return
float v = (0.5 + value) / 256.0;
return v;
}
20 changes: 12 additions & 8 deletions data/shader/reflection/rtreflection.csh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <../common/flatten.hsh>
#include <../common/convert.hsh>
#include <../common/PI.hsh>
#include <../common/bluenoise.hsh>

#include <../brdf/brdfEval.hsh>
#include <../brdf/importanceSample.hsh>
Expand All @@ -27,8 +28,10 @@ layout(set = 3, binding = 2) uniform sampler2D depthTexture;
layout(set = 3, binding = 3) uniform sampler2D roughnessMetallicAoTexture;
layout(set = 3, binding = 4) uniform isampler2D offsetTexture;
layout(set = 3, binding = 5) uniform usampler2D materialIdxTexture;
layout(set = 3, binding = 6) uniform sampler2D randomTexture;
layout(set = 3, binding = 7) uniform sampler2DArrayShadow cascadeMaps;
layout(set = 3, binding = 6) uniform sampler2DArrayShadow cascadeMaps;

layout(set = 3, binding = 7) uniform sampler2D scramblingRankingTexture;
layout(set = 3, binding = 8) uniform sampler2D sobolSequenceTexture;

const ivec2 offsets[4] = ivec2[4](
ivec2(0, 0),
Expand All @@ -37,7 +40,7 @@ const ivec2 offsets[4] = ivec2[4](
ivec2(1, 1)
);

layout(std140, set = 3, binding = 8) uniform UniformBuffer {
layout(std140, set = 3, binding = 9) uniform UniformBuffer {
float radianceLimit;
uint frameSeed;
float bias;
Expand Down Expand Up @@ -70,11 +73,12 @@ void main() {
vec3 worldPos = vec3(globalData.ivMatrix * vec4(viewPos, 1.0));
vec3 viewVec = vec3(globalData.ivMatrix * vec4(viewPos, 0.0));
vec3 worldNorm = normalize(vec3(globalData.ivMatrix * vec4(2.0 * textureLod(normalTexture, texCoord, 0).rgb - 1.0, 0.0)));

ivec2 noiseOffset = Unflatten2D(int(uniforms.frameSeed), ivec2(16)) * ivec2(8);
vec2 blueNoiseVec = texelFetch(randomTexture, (pixel + noiseOffset) % ivec2(128), 0).xy * 256.0;
blueNoiseVec = clamp(blueNoiseVec, 0.0, 255.0);
blueNoiseVec = (blueNoiseVec + 0.5) / 256.0;

int sampleIdx = int(uniforms.frameSeed);
vec2 blueNoiseVec = vec2(
SampleBlueNoise(pixel, sampleIdx, 0, scramblingRankingTexture, sobolSequenceTexture),
SampleBlueNoise(pixel, sampleIdx, 1, scramblingRankingTexture, sobolSequenceTexture)
);

uint materialIdx = texelFetch(materialIdxTexture, pixel, 0).r;
Material material = UnpackMaterial(materialIdx);
Expand Down
2 changes: 1 addition & 1 deletion data/shader/reflection/temporal.csh
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ void ComputeVarianceMinMax(out vec3 aabbMin, out vec3 aabbMax) {
vec3 sampleRadiance = FetchCurrentRadiance(sharedMemoryIdx);
float sampleLinearDepth = FetchDepth(sharedMemoryIdx);
float depthPhi = max(1.0, abs(0.25 * linearDepth));
float depthPhi = max(1.0, abs(0.025 * linearDepth));
float weight = min(1.0 , exp(-abs(linearDepth - sampleLinearDepth) / depthPhi));
sampleRadiance *= weight;
Expand Down
Binary file added data/sobol.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions src/demo/App.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -832,6 +832,7 @@ bool App::LoadScene() {
// Setup camera
camera.location = glm::vec3(30.0f, 25.0f, 0.0f);
camera.rotation = glm::vec2(-3.14f / 2.0f, 0.0f);
camera.exposure = 1.0f;

scene.fog->enable = true;
}
Expand Down
6 changes: 6 additions & 0 deletions src/engine/graphics/CommandList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ namespace Atlas {

}

void CommandList::DependsOn(CommandList *commandList) {

dependencies.push_back(commandList);

}

void CommandList::BeginCommands() {

// Here we assume the command buffer is free to write and not used
Expand Down
10 changes: 10 additions & 0 deletions src/engine/graphics/CommandList.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ namespace Atlas {

namespace Graphics {

enum class ExecutionOrder {
Sequential = 0,
Parallel = 1
};

enum QueueType {
GraphicsQueue = 0,
PresentationQueue,
Expand All @@ -45,6 +50,8 @@ namespace Atlas {

CommandList& operator=(const CommandList& that) = delete;

void DependsOn(CommandList* commandList);

void BeginCommands();

void EndCommands();
Expand Down Expand Up @@ -160,6 +167,9 @@ namespace Atlas {
Ref<Pipeline> pipelineInUse = nullptr;
Ref<FrameBuffer> frameBufferInUse = nullptr;

std::vector<CommandList*> dependencies;
ExecutionOrder executionOrder = ExecutionOrder::Sequential;

private:
struct DescriptorBindingData {
Buffer* buffers[DESCRIPTOR_SET_COUNT][BINDINGS_PER_DESCRIPTOR_SET];
Expand Down
9 changes: 8 additions & 1 deletion src/engine/graphics/GraphicsDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@ namespace Atlas {
commandLists, true);

commandList->isSubmitted = false;
commandList->dependencies.clear();

return commandList;
}
Expand All @@ -323,6 +324,7 @@ namespace Atlas {
currentFrameData->commandLists, false);

commandList->isSubmitted = false;
commandList->dependencies.clear();

return commandList;
}
Expand All @@ -339,11 +341,16 @@ namespace Atlas {
// when we get back to this frames data and start a new frame with it.
auto frame = GetFrameData();

cmd->executionOrder = order;

std::vector<VkSemaphore> waitSemaphores = { frame->semaphore };
std::vector<VkPipelineStageFlags> waitStages = { waitStage };
if (frame->submittedCommandLists.size() > 0) {
if (frame->submittedCommandLists.size() > 0 && order == ExecutionOrder::Sequential) {
waitSemaphores[0] = frame->submittedCommandLists.back()->semaphore;
}
else if (order == ExecutionOrder::Parallel) {

}

VkSubmitInfo submit = {};
submit.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
Expand Down
5 changes: 0 additions & 5 deletions src/engine/graphics/GraphicsDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ namespace Atlas {
class Instance;
class ImguiWrapper;

enum class ExecutionOrder {
Sequential = 0,
Parallel = 1
};

class FrameData {
public:
VkSemaphore semaphore;
Expand Down
4 changes: 2 additions & 2 deletions src/engine/lighting/VolumetricClouds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ namespace Atlas {

VolumetricClouds::VolumetricClouds(int32_t shapeResolution, int32_t detailResolution) :
shapeTexture(shapeResolution, shapeResolution, shapeResolution,
VK_FORMAT_R16G16B16A16_SFLOAT, Texture::Wrapping::Repeat, Texture::Filtering::MipMapLinear),
VK_FORMAT_R16G16_SFLOAT, Texture::Wrapping::Repeat, Texture::Filtering::MipMapLinear),
detailTexture(detailResolution, detailResolution, detailResolution,
VK_FORMAT_R16G16B16A16_SFLOAT, Texture::Wrapping::Repeat, Texture::Filtering::MipMapLinear) {
VK_FORMAT_R16_SFLOAT, Texture::Wrapping::Repeat, Texture::Filtering::MipMapLinear) {



Expand Down
Loading

0 comments on commit 1cec285

Please sign in to comment.