Skip to content

Commit

Permalink
Experimenting with wave intrinsics
Browse files Browse the repository at this point in the history
 + updated dxcompiler with latest bug fixes
 + copied over ShadowHelper updates from newer sample frameworks
 + worked around DXIL validator issues
  • Loading branch information
TheRealMJP committed Nov 1, 2018
1 parent 879068f commit 5ed3ce9
Show file tree
Hide file tree
Showing 20 changed files with 967 additions and 453 deletions.
2 changes: 1 addition & 1 deletion BindlessDeferred/AppConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@

#define EnableSkyModel_ (1)
#define EnableEmbree_ (0)
#define EnableShaderModel6_ (0)
#define EnableShaderModel6_ (1)
1 change: 1 addition & 0 deletions BindlessDeferred/AppSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ public class Scene
const uint MaxSpotLights = 32;
const uint SpotLightElementsPerCluster = MaxSpotLights / 32;
const float SpotLightRange = 7.5f;
const float SpotShadowNearClip = 0.1f;

const uint DeferredTileSize = 8;
const uint DeferredTileMaskSize = (DeferredTileSize * DeferredTileSize) / 32;
Expand Down
1 change: 1 addition & 0 deletions BindlessDeferred/AppSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ namespace AppSettings
static const uint64 MaxSpotLights = 32;
static const uint64 SpotLightElementsPerCluster = 1;
static const float SpotLightRange = 7.5000f;
static const float SpotShadowNearClip = 0.1000f;
static const uint64 DeferredTileSize = 8;
static const uint64 DeferredTileMaskSize = 2;
static const float DeferredUVScale = 2.0000f;
Expand Down
1 change: 1 addition & 0 deletions BindlessDeferred/AppSettings.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ static const uint DecalElementsPerCluster = 2;
static const uint MaxSpotLights = 32;
static const uint SpotLightElementsPerCluster = 1;
static const float SpotLightRange = 7.5000f;
static const float SpotShadowNearClip = 0.1000f;
static const uint DeferredTileSize = 8;
static const uint DeferredTileMaskSize = 2;
static const float DeferredUVScale = 2.0000f;
2 changes: 1 addition & 1 deletion BindlessDeferred/BindlessDeferred.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2167,7 +2167,7 @@ void BindlessDeferred::RenderDeferred()

DX12::BindTempConstantBuffer(cmdList, shadingConstants, DeferredParams_PSCBuffer, CmdListMode::Compute);

const SunShadowConstants& sunShadowConstants = meshRenderer.SunShadowConstantData();
const SunShadowConstantsDepthMap& sunShadowConstants = meshRenderer.SunShadowConstantData();
DX12::BindTempConstantBuffer(cmdList, sunShadowConstants, DeferredParams_ShadowCBuffer, CmdListMode::Compute);

spotLightBuffer.SetAsComputeRootParameter(cmdList, DeferredParams_LightCBuffer);
Expand Down
2 changes: 1 addition & 1 deletion BindlessDeferred/BindlessDeferred.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<ProjectGuid>{FA705507-9C58-4413-8878-8795F3B9897D}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>BindlessDeferred</RootNamespace>
<WindowsTargetPlatformVersion>10.0.16299.0</WindowsTargetPlatformVersion>
<WindowsTargetPlatformVersion>10.0.17763.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
Expand Down
5 changes: 4 additions & 1 deletion BindlessDeferred/Mesh.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,16 @@ PSOutputForward PSForward(in PSInput input)
shadingInput.ShadowCBuffer = ShadowCBuffer;
shadingInput.LightCBuffer = LightCBuffer;

// The DXIL validator is complaining if we do this after the wave operations inside of ShadePixel
float3 gradients = abs(float3(ddx(input.UV), ddy(input.UV).x)) * 64.0f;

Texture2DArray sunShadowMap = Tex2DArrayTable[SRVIndices.SunShadowMapIdx];
Texture2DArray spotLightShadowMap = Tex2DArrayTable[SRVIndices.SpotLightShadowMapIdx];

float3 shadingResult = ShadePixel(shadingInput, sunShadowMap, spotLightShadowMap, ShadowMapSampler);

if(AppSettings.ShowUVGradients)
shadingResult = abs(float3(ddx(input.UV), ddy(input.UV).x)) * 64.0f;
shadingResult = gradients;

// The tangent frame can have arbitrary handedness, so we force it to be left-handed.
// We don't pack the handedness bit for forward rendering, since the decal picking only
Expand Down
4 changes: 2 additions & 2 deletions BindlessDeferred/MeshRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -669,7 +669,7 @@ void MeshRenderer::RenderSunShadowMap(ID3D12GraphicsCommandList* cmdList, const
ProfileBlock profileBlock(cmdList, "Sun Shadow Map Rendering");

OrthographicCamera cascadeCameras[NumCascades];
ShadowHelper::PrepareCascades(AppSettings::SunDirection, SunShadowMapSize, true, camera, sunShadowConstants, cascadeCameras);
ShadowHelper::PrepareCascades(AppSettings::SunDirection, SunShadowMapSize, true, camera, sunShadowConstants.Base, cascadeCameras);

// Render the meshes to each cascade
for(uint64 cascadeIdx = 0; cascadeIdx < NumCascades; ++cascadeIdx)
Expand Down Expand Up @@ -714,7 +714,7 @@ void MeshRenderer::RenderSpotLightShadowMap(ID3D12GraphicsCommandList* cmdList,

// Draw the mesh with depth only, using the new shadow camera
PerspectiveCamera shadowCamera;
shadowCamera.Initialize(1.0f, light.AngularAttenuation.y, 0.1f, AppSettings::SpotLightRange);
shadowCamera.Initialize(1.0f, light.AngularAttenuation.y, AppSettings::SpotShadowNearClip, AppSettings::SpotLightRange);
shadowCamera.SetPosition(light.Position);
shadowCamera.SetOrientation(light.Orientation);
RenderSpotLightShadowDepth(cmdList, shadowCamera);
Expand Down
4 changes: 2 additions & 2 deletions BindlessDeferred/MeshRenderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class MeshRenderer
const DepthBuffer& SpotLightShadowMap() const { return spotLightShadowMap; }
const Float4x4* SpotLightShadowMatrices() const { return spotLightShadowMatrices; }
const StructuredBuffer& MaterialTextureIndicesBuffer() const { return materialTextureIndices; }
const SunShadowConstants& SunShadowConstantData() { return sunShadowConstants; }
const SunShadowConstantsDepthMap& SunShadowConstantData() { return sunShadowConstants; }

protected:

Expand Down Expand Up @@ -124,5 +124,5 @@ class MeshRenderer
Array<uint32> meshDrawIndices;
Array<float> meshZDepths;

SunShadowConstants sunShadowConstants;
SunShadowConstantsDepthMap sunShadowConstants;
};
50 changes: 38 additions & 12 deletions BindlessDeferred/Shading.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,12 @@
//=================================================================================================

// Options
#define ShadowMapMode_ ShadowMapMode_DepthMap_

#ifndef UseImplicitShadowDerivatives_
#define UseImplicitShadowDerivatives_ 0
#endif

#define UseReceiverPlaneBias_ 1

// Set this to zero to make compile times quicker
#define UseGatherPCF_ 1
#define ShadowMapMode_ 0
#define UseGatherPCF_ 0

#include <DescriptorTables.hlsl>
#include <SH.hlsl>
Expand Down Expand Up @@ -110,8 +106,8 @@ float3 CalcLighting(in float3 normal, in float3 lightDir, in float3 peakIrradian
// are passed directly to this function instead of through the ShadingInput struct in order to
// work around incorrect behavior from the shader compiler
//-------------------------------------------------------------------------------------------------
float3 ShadePixel(in ShadingInput input, in Texture2DArray SunShadowMap,
in Texture2DArray SpotLightShadowMap, in SamplerComparisonState ShadowSampler)
float3 ShadePixel(in ShadingInput input, in Texture2DArray sunShadowMap,
in Texture2DArray spotLightShadowMap, in SamplerComparisonState shadowSampler)
{
float3 vtxNormalWS = input.TangentFrame._m20_m21_m22;
float3 normalWS = vtxNormalWS;
Expand Down Expand Up @@ -168,6 +164,14 @@ float3 ShadePixel(in ShadingInput input, in Texture2DArray SunShadowMap,
{
// Loop until we've processed every raised bit
uint clusterElemMask = input.DecalClusterBuffer.Load((clusterOffset + elemIdx) * 4);

#if DXC_
// OR the cluster bitmask across the entire wave to force it to be wave-uniform.
// This can allow AMD hardware to use scalar loads and registers for data from the decal buffer.
clusterElemMask = WaveActiveBitOr(clusterElemMask);
clusterElemMask = WaveReadLaneFirst(clusterElemMask);
#endif

while(clusterElemMask)
{
uint bitIdx = firstbitlow(clusterElemMask);
Expand Down Expand Up @@ -251,16 +255,23 @@ float3 ShadePixel(in ShadingInput input, in Texture2DArray SunShadowMap,

if(AppSettings.EnableSun)
{
float3 sunDirection = CBuffer.SunDirectionWS;

float2 shadowMapSize;
float numSlices;
sunShadowMap.GetDimensions(shadowMapSize.x, shadowMapSize.y, numSlices);

const float3 shadowPosOffset = GetShadowPosOffset(saturate(dot(vtxNormalWS, sunDirection)), vtxNormalWS, shadowMapSize.x);

#if UseImplicitShadowDerivatives_
// Forward path
float sunShadowVisibility = SunShadowVisibility(positionWS, depthVS, SunShadowMap, ShadowSampler, ShadowCBuffer, 0);
float sunShadowVisibility = SunShadowVisibility(positionWS, depthVS, shadowPosOffset, 0.0f, sunShadowMap, shadowSampler, ShadowCBuffer);
#else
// Deferred path
float sunShadowVisibility = SunShadowVisibility(positionWS, positionNeighborX, positionNeighborY,
depthVS, SunShadowMap, ShadowSampler, ShadowCBuffer, 0);
depthVS, shadowPosOffset, 0.0f, sunShadowMap, shadowSampler, ShadowCBuffer);
#endif

float3 sunDirection = CBuffer.SunDirectionWS;
if(AppSettings.SunAreaLightApproximation)
{
float3 D = CBuffer.SunDirectionWS;
Expand All @@ -279,6 +290,10 @@ float3 ShadePixel(in ShadingInput input, in Texture2DArray SunShadowMap,
uint numLights = 0;
if(AppSettings.RenderLights)
{
float2 shadowMapSize;
float numSlices;
spotLightShadowMap.GetDimensions(shadowMapSize.x, shadowMapSize.y, numSlices);

uint clusterOffset = clusterIdx * SpotLightElementsPerCluster;

// Loop over the number of 4-byte elements needed for each cluster
Expand All @@ -287,6 +302,14 @@ float3 ShadePixel(in ShadingInput input, in Texture2DArray SunShadowMap,
{
// Loop until we've processed every raised bit
uint clusterElemMask = input.SpotLightClusterBuffer.Load((clusterOffset + elemIdx) * 4);

#if DXC_
// OR the cluster bitmask across the entire wave to force it to be wave-uniform.
// This can allow AMD hardware to use scalar loads and registers for data from the light buffer.
clusterElemMask = WaveActiveBitOr(clusterElemMask);
clusterElemMask = WaveReadLaneFirst(clusterElemMask);
#endif

while(clusterElemMask)
{
uint bitIdx = firstbitlow(clusterElemMask);
Expand All @@ -307,10 +330,13 @@ float3 ShadePixel(in ShadingInput input, in Texture2DArray SunShadowMap,
falloff = (falloff * falloff) / (distanceToLight * distanceToLight + 1.0f);
float3 intensity = spotLight.Intensity * angularAttenuation * falloff;

const float3 shadowPosOffset = GetShadowPosOffset(saturate(dot(vtxNormalWS, surfaceToLight)), vtxNormalWS, shadowMapSize.x);

// We have to use explicit gradients for spotlight shadows, since the looping/branching is non-uniform
float spotLightVisibility = SpotLightShadowVisibility(positionWS, positionNeighborX, positionNeighborY,
input.LightCBuffer.ShadowMatrices[spotLightIdx],
spotLightIdx, SpotLightShadowMap, ShadowSampler, 0.0f, 0);
spotLightIdx, shadowPosOffset, spotLightShadowMap, shadowSampler,
float2(SpotShadowNearClip, spotLight.Range), ShadowCBuffer.Extra);

output += CalcLighting(normalWS, surfaceToLight, intensity, diffuseAlbedo, specularAlbedo,
roughness, positionWS, CBuffer.CameraPosWS) * spotLightVisibility;
Expand Down
Binary file modified Externals/DXCompiler/Bin/dxcompiler.dll
Binary file not shown.
Binary file modified Externals/DXCompiler/Lib/dxcompiler.lib
Binary file not shown.
5 changes: 5 additions & 0 deletions SampleFramework12/v1.01/Graphics/Camera.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Camera
float NearClip() const { return nearZ; };
float FarClip() const { return farZ; };


Float3 Forward() const;
Float3 Back() const;
Float3 Up() const;
Expand All @@ -62,6 +63,8 @@ class Camera
void SetNearClip(float newNearClip);
void SetFarClip(float newFarClip);
void SetProjection(const Float4x4& newProjection);

virtual bool IsOrthographic() const { return false; }
};

// Camera with an orthographic projection
Expand Down Expand Up @@ -90,6 +93,8 @@ class OrthographicCamera : public Camera
void SetMinY(float minY);
void SetMaxX(float maxX);
void SetMaxY(float maxY);

bool IsOrthographic() const override { return true; }
};

// Camera with a perspective projection
Expand Down
11 changes: 11 additions & 0 deletions SampleFramework12/v1.01/Graphics/GraphicsTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,17 @@ void RenderTexture::MakeWritable(ID3D12GraphicsCommandList* cmdList, uint64 mipL
DX12::TransitionResource(cmdList, Texture.Resource, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET, subResourceIdx);
}

void RenderTexture::UAVBarrier(ID3D12GraphicsCommandList* cmdList) const
{
Assert_(Texture.Resource != nullptr);

D3D12_RESOURCE_BARRIER barrier = { };
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
barrier.UAV.pResource = Texture.Resource;
cmdList->ResourceBarrier(1, &barrier);
}

// == VolumeTexture ===============================================================================

VolumeTexture::VolumeTexture()
Expand Down
1 change: 1 addition & 0 deletions SampleFramework12/v1.01/Graphics/GraphicsTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ struct RenderTexture
void Transition(ID3D12GraphicsCommandList* cmdList, D3D12_RESOURCE_STATES before, D3D12_RESOURCE_STATES after, uint64 mipLevel = uint64(-1), uint64 arraySlice = uint64(-1)) const;
void MakeReadable(ID3D12GraphicsCommandList* cmdList, uint64 mipLevel = uint64(-1), uint64 arraySlice = uint64(-1)) const;
void MakeWritable(ID3D12GraphicsCommandList* cmdList, uint64 mipLevel = uint64(-1), uint64 arraySlice = uint64(-1)) const;
void UAVBarrier(ID3D12GraphicsCommandList* cmdList) const;

uint32 SRV() const { return Texture.SRV; }
uint64 Width() const { return Texture.Width; }
Expand Down
2 changes: 1 addition & 1 deletion SampleFramework12/v1.01/Graphics/ShaderCompilation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ static ID3DBlob* CompileShader(const wchar* path, const char* functionName, Shad
{
if(errorMessages)
{
wchar message[1024] = { 0 };
wchar message[1024 * 4] = { 0 };
char* blobdata = reinterpret_cast<char*>(errorMessages->GetBufferPointer());

MultiByteToWideChar(CP_ACP, 0, blobdata, static_cast<int>(errorMessages->GetBufferSize()), message, 1024);
Expand Down
Loading

0 comments on commit 5ed3ce9

Please sign in to comment.