Skip to content

Commit

Permalink
完成LOD shader
Browse files Browse the repository at this point in the history
  • Loading branch information
fakersaber committed May 27, 2021
1 parent d8602fd commit fec7239
Show file tree
Hide file tree
Showing 11 changed files with 539 additions and 67 deletions.
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
- [x] Resources System
- [x] Render Resources
- [x] Handle Heightmap
- [ ] Generator BoundingBox
- [x] Generator BoundingBox
- [x] ComputeLOD - Same to Unreal
- [ ] GPU Culling
- [ ] Test InstanceOffset
- [ ] SeralizeData



Optimization

- [ ] Test InstanceOffset
- [ ] Test ARM Integer register
- [ ] Remove old data
178 changes: 161 additions & 17 deletions Shader/LandscapeGpuRender.usf
Original file line number Diff line number Diff line change
@@ -1,35 +1,179 @@
#include "Common.ush"

float ComputeBoundsScreenRadiusSquared(const FVector4& BoundsOrigin, const float SphereRadius, const FVector4& ViewOrigin, const FMatrix& ProjMatrix)
#define GROUP_TILE_SIZE 64
#define GROUP_TILE_SIZE_1 8
#define DRAWCOMMAND_SIZE 5

//[Input]
/* Layout
float3 ViewOriginPosition;
float4 ProjMatrixParameters; (ProjMatrix.M[0][0], ProjMatrix.M[1][1], ProjMatrix.M[2][3], ClusterSqureSizePerComponent)
float4 LODSettingsComponent; (LastLODScreenSizeSquared, LOD1ScreenSizeSquared, LODOnePlusDistributionScalarSquared, LastLODIndex)
*/
float4 LodCSParameters[3];
Buffer<float4> ComponentsOriginAndRadiusSRV;

//[Output]
RWBuffer<uint> ClusterLodBufferUAV;

float ComputeBoundsScreenRadiusSquared(float4 OriginAndRadius)
{
// ignore perspective foreshortening for orthographic projections
const float DistSqr = FVector::DistSquared(BoundsOrigin, ViewOrigin) * ProjMatrix.M[2][3];
// const float DistSqr = FVector::DistSquared(BoundsOrigin, ViewOrigin) * ProjMatrix.M[2][3];
float3 ViewOriginPosition = LodCSParameters[0].xyz;
float3 ProjMatrixParameters = LodCSParameters[1].xyz;
const float DistSqr = dot(ViewOriginPosition - OriginAndRadius.xyz, ViewOriginPosition - OriginAndRadius.xyz) * ProjMatrixParameters.z;

// Get projection multiple accounting for view scaling.
const float ScreenMultiple = FMath::Max(0.5f * ProjMatrix.M[0][0], 0.5f * ProjMatrix.M[1][1]);
const float ScreenMultiple = max(0.5f * ProjMatrixParameters.x, 0.5f * ProjMatrixParameters.y);

// Calculate screen-space projected radius
return FMath::Square(ScreenMultiple * SphereRadius) / FMath::Max(1.0f, DistSqr);
return Square(ScreenMultiple * OriginAndRadius.w) / max(1.0f, DistSqr);
}

uint GetLODFromScreenSize(LODSettingsComponent LODSettings, float InScreenSizeSquared, float InViewLODScale, float& OutFractionalLOD)
uint GetLODFromScreenSize(float InScreenSizeSquared)
{
float ScreenSizeSquared = InScreenSizeSquared / InViewLODScale;

if (ScreenSizeSquared <= LODSettings.LastLODScreenSizeSquared)
//LODDistanceFactor Don't consider LODScale for now
//float ScreenSizeSquared = InScreenSizeSquared / InViewLODScale;
float ScreenSizeSquared = InScreenSizeSquared;
float4 LODSettings = LodCSParameters[2];

uint CurLod = ScreenSizeSquared <= LODSettings.x ? (uint) LODSettings.w
: ScreenSizeSquared > LODSettings.y ? 0
: (1 + log2(LODSettings.y / ScreenSizeSquared) / log2(LODSettings.z));

return CurLod;
}

[numthreads(GROUP_TILE_SIZE, 1, 1)]
void ClusterComputeLODCS(uint DispatchThreadId : SV_DispatchThreadID)
{
float BoundsScreenRadiusSquared = ComputeBoundsScreenRadiusSquared(ComponentsOriginAndRadiusSRV[DispatchThreadId]);
uint Lod = GetLODFromScreenSize(BoundsScreenRadiusSquared);
uint ClusterSqureSizePerComponent = (uint)LodCSParameters[1].w;
uint StartClusterIndex = DispatchThreadId * ClusterSqureSizePerComponent;

LOOP
for (uint ClusterIndex = 0; ClusterIndex < ClusterSqureSizePerComponent; ++ClusterIndex)
{
ClusterLodBufferUAV[StartClusterIndex + ClusterIndex] = Lod;
}
}

//[Input]
struct ClusterInputData
{
float3 BoundCenter;

float3 BoundExtent;
};

float4 ViewFrustumPermutedPlanes[8];
uint TotalCluster;
uint ClusterSizePerComponent;
uint2 LandscapeComponentSize;
StructuredBuffer<ClusterInputData> ClusterInputData_SRV;
Buffer<uint> ClusterLodBufferSRV;

//[Output]
RWBuffer<uint> ClusterOutBufferUAV;
RWBuffer<uint> DrawCommandBufferUAV;

bool IntersectBox8Plane(in float3 Center, in float3 Extent, out bool InsideNearPlane)
{
float4 DistX_0 = Center.xxxx * ViewFrustumPermutedPlanes[0];
float4 DistY_0 = Center.yyyy * ViewFrustumPermutedPlanes[1] + DistX_0;
float4 DistZ_0 = Center.zzzz * ViewFrustumPermutedPlanes[2] + DistY_0;
float4 Distance_0 = DistZ_0 - ViewFrustumPermutedPlanes[3];

float4 PushX_0 = Extent.xxxx * abs(ViewFrustumPermutedPlanes[0]);
float4 PushY_0 = Extent.yyyy * abs(ViewFrustumPermutedPlanes[1]) + PushX_0;
float4 PushOut_0 = Extent.zzzz * abs(ViewFrustumPermutedPlanes[2]) + PushY_0;

if (any(Distance_0 > PushOut_0))
{
OutFractionalLOD = LODSettings.LastLODIndex;
return LODSettings.LastLODIndex;
return false;
}
else if (ScreenSizeSquared > LODSettings.LOD1ScreenSizeSquared)

InsideNearPlane = Distance_0.x < -PushOut_0.x;

float4 DistX_1 = Center.xxxx * ViewFrustumPermutedPlanes[4];
float4 DistY_1 = Center.yyyy * ViewFrustumPermutedPlanes[5] + DistX_1;
float4 DistZ_1 = Center.zzzz * ViewFrustumPermutedPlanes[6] + DistY_1;
float4 Distance_1 = DistZ_1 - ViewFrustumPermutedPlanes[7];

float4 PushX_1 = Extent.xxxx * abs(ViewFrustumPermutedPlanes[4]);
float4 PushY_1 = Extent.yyyy * abs(ViewFrustumPermutedPlanes[5]) + PushX_1;
float4 PushOut_1 = Extent.zzzz * abs(ViewFrustumPermutedPlanes[6]) + PushY_1;

if (any(Distance_1 > PushOut_1))
{
OutFractionalLOD = (LODSettings.LOD0ScreenSizeSquared - FMath::Min(ScreenSizeSquared, LODSettings.LOD0ScreenSizeSquared)) / (LODSettings.LOD0ScreenSizeSquared - LODSettings.LOD1ScreenSizeSquared);
return 0;
return false;
}
else

return true;
}

uint GetLinearIndexByClusterIndex(in int2 ClusterIndex)
{
uint2 ClampSize = clamp(ClusterIndex, int2(0, 0), int2(LandscapeComponentSize * ClusterSizePerComponent) - int2(1, 1));
uint ClusterSqureSizePerComponent = ClusterSizePerComponent * ClusterSizePerComponent;
uint2 ClusterOffset = ClampSize & (ClusterSizePerComponent - 1);
uint2 ComponentOffset = ClampSize / ClusterSizePerComponent;

uint offset_1 = ComponentOffset.y * ClusterSqureSizePerComponent * LandscapeComponentSize.x + ComponentOffset.x * ClusterSqureSizePerComponent;
uint offset_2 = ClusterOffset.x + ClusterOffset.y * ClusterSizePerComponent;

return offset_1 + offset_2;
}

uint2 GetLinearIndexByClusterIndexBatch(in uint4 ClusterIndex)
{
uint4 ClampSize = clamp((int4) ClusterIndex, int4(0, 0, 0, 0), int4(LandscapeComponentSize.xyxy * ClusterSizePerComponent) - int4(1, 1, 1, 1));
uint4 ClusterOffset = ClampSize & (ClusterSizePerComponent - 1);
uint4 ComponentOffset = ClampSize / ClusterSizePerComponent;
uint ClusterSqureSizePerComponent = ClusterSizePerComponent * ClusterSizePerComponent;
uint2 offset_1 = ComponentOffset.yw * ClusterSqureSizePerComponent * LandscapeComponentSize.x + ComponentOffset.xz * ClusterSqureSizePerComponent;
uint2 offset_2 = ClusterOffset.xz + ClusterOffset.yw * ClusterSizePerComponent;

return offset_1 + offset_2;
}

[numthreads(GROUP_TILE_SIZE_1, GROUP_TILE_SIZE_1, 1)]
void LandscapeGpuCullingCS(uint2 DispatchThreadId : SV_DispatchThreadID)
{
//保证一个Wrap访问的内存连续, Cache friend
uint CenterLinearIndex = GetLinearIndexByClusterIndex(DispatchThreadId);
ClusterInputData RenderData = ClusterInputData_SRV[CenterLinearIndex];
uint ClusterLod = ClusterLodBufferSRV[CenterLinearIndex];
bool InsideNearPlane;
bool bIsFrustumVisible = IntersectBox8Plane(RenderData.BoundCenter, RenderData.BoundExtent, InsideNearPlane);

BRANCH
if (!bIsFrustumVisible)
{
// No longer linear fraction, but worth the cache misses
OutFractionalLOD = 1 + FMath::LogX(LODSettings.LODOnePlusDistributionScalarSquared, LODSettings.LOD1ScreenSizeSquared / ScreenSizeSquared);
return (int8) OutFractionalLOD;
return;
}

//打包对应数据到输出数据中
uint PackOutputData = 0;
uint DownLod = ClusterInputData_SRV[GetLinearIndexByClusterIndex(int2(0, 1) + (int2)DispatchThreadId)];
uint LeftLod = ClusterInputData_SRV[GetLinearIndexByClusterIndex(int2(-1, 0) + (int2) DispatchThreadId)];
uint TopLod = ClusterInputData_SRV[GetLinearIndexByClusterIndex(int2(0, -1) + (int2) DispatchThreadId)];
uint RightLod = ClusterInputData_SRV[GetLinearIndexByClusterIndex(int2(1, 0) + (int2) DispatchThreadId)];

PackOutputData = PackOutputData | (DispatchThreadId.x & 0xFF);
PackOutputData = PackOutputData | ((DispatchThreadId.y << 8) & 0xFF00); // There may be an error in the ARM register?
PackOutputData = PackOutputData | ((DownLod << 16) & 0x70000);
PackOutputData = PackOutputData | ((LeftLod << 19) & 0x380000);
PackOutputData = PackOutputData | ((TopLod << 22) & 0x1C0000);
PackOutputData = PackOutputData | ((RightLod << 25) & 0xE000000);
PackOutputData = PackOutputData | ((ClusterLod << 28) & 0x70000000);

//计算对应LOD级别的offset
uint IndirectDrawInstanceCountIndex = ClusterLod * DRAWCOMMAND_SIZE * 1; //InstanceCount
uint OutPutOffset = ClusterLod * TotalCluster;
uint CurrentLodCount = 0;
InterlockedAdd(DrawCommandBufferUAV[IndirectDrawInstanceCountIndex], 1, CurrentLodCount);
ClusterOutBufferUAV[CurrentLodCount + OutPutOffset] = PackOutputData;
}
3 changes: 1 addition & 2 deletions Shader/LandscapeGpuRenderVertexFactory.ush
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,7 @@ FVertexFactoryIntermediates GetVertexFactoryIntermediates(FVertexFactoryInput In
ClampPosition = EdgeCluster ? ClampPosition / NonUniformLodSize * (LandscapeGpuRenderUniformBuffer.QuadSizeParameter.x - 1.f) : ClampPosition * AdjustLodScale;

uint2 SectionBlock = ClusterIndex.xy / LandscapeGpuRenderUniformBuffer.NumClusterPerSection;
float2 ClusterPositionSection = ClusterOffset * LandscapeGpuRenderUniformBuffer.QuadSizeParameter.xx;
float2 PositionInSection = ClampPosition + ClusterPositionSection;
float2 PositionInSection = ClampPosition + ClusterOffset * LandscapeGpuRenderUniformBuffer.QuadSizeParameter.xx;
float2 ClusterPositionGlobal = SectionBlock * LandscapeGpuRenderUniformBuffer.QuadSizeParameter.yy;

//Sample Hiehgtmap
Expand Down
130 changes: 129 additions & 1 deletion Sources/LandscapeGpuRenderProxyComponent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
#include "LandscapeComponent.h"
#include "LandscapeProxy.h"
#include "LandscapeMobileGPURender.h"
#include "LandscapeDataAccess.h"

ULandscapeGpuRenderProxyComponent::ULandscapeGpuRenderProxyComponent(const FObjectInitializer& ObjectInitializer)
: Super(ObjectInitializer)
, bIsClusterBoundingCreated(false)
, NumComponents(0)
, ComponentSectionSize(0)
, SectionSizeQuads(0)
, HeightmapTexture(nullptr)
, ProxyLocalBox(ForceInit)
//#if WITH_EDITORONLY_DATA
Expand Down Expand Up @@ -80,6 +84,7 @@ void ULandscapeGpuRenderProxyComponent::Init(ULandscapeComponent* LandscapeCompo

//Set SectionSizeQuads
SectionSizeQuads = LandscapeComponent->SubsectionSizeQuads;
ComponentSectionSize = LandscapeComponent->NumSubsections;

//Set LandscapeKey
LandscapeKey = LandscapeComponent->GetLandscapeProxy()->GetLandscapeGuid();
Expand Down Expand Up @@ -165,7 +170,130 @@ void ULandscapeGpuRenderProxyComponent::GetUsedMaterials(TArray<UMaterialInterfa
//#endif
}

void ULandscapeGpuRenderProxyComponent::UpdateBoundingInformation(const FBox& ComponentCachedLocalBox, const FIntPoint& ComponentQuadBase) {
//Component are local coordinate conversion is required
FVector ComponentMaxBox = FVector(ComponentCachedLocalBox.Max.X + ComponentQuadBase.X, ComponentCachedLocalBox.Max.Y + ComponentQuadBase.Y, ComponentCachedLocalBox.Max.Z);
ProxyLocalBox += FBox(ComponentCachedLocalBox.Min, ComponentMaxBox); //Calculate the boundingbox
NumComponents += 1;
}

void ULandscapeGpuRenderProxyComponent::CheckResources(ULandscapeComponent* LandscapeComponent) {
check(HeightmapTexture != nullptr);
check(HeightmapTexture == LandscapeComponent->HeightmapTexture);
}
}

void ULandscapeGpuRenderProxyComponent::CreateClusterBounding(const FLandscapeSubmitData& LandscapeSubmitData) {
FVector BoundingSize = ProxyLocalBox.GetSize();

//The total section size of the landscape
const uint32 SectionSizeX = static_cast<uint32>(BoundingSize.X) / SectionSizeQuads;
const uint32 SectionSizeY = static_cast<uint32>(BoundingSize.Y) / SectionSizeQuads;
const uint32 LandscapeComponentSizeX = SectionSizeX / ComponentSectionSize;
const uint32 LandscapeComponentSizeY = SectionSizeY / ComponentSectionSize;
const uint32 SectionVerts = SectionSizeQuads + 1;

//Cluster parameters
const uint32 ClusterSizePerSection = (SectionSizeQuads + 1) / LandscapeGpuRenderParameter::ClusterQuadSize;
const uint32 ClusterSizeX = ClusterSizePerSection * SectionSizeX;
const uint32 ClusterSizeY = ClusterSizePerSection * SectionSizeY;
const uint32 ClusterSizePerComponent = ClusterSizePerSection * ComponentSectionSize;
const uint32 ClusterSqureSizePerComponent = ClusterSizePerComponent * ClusterSizePerComponent;
check(SectionSizeX * SectionSizeY == NumComponents * ComponentSectionSize * ComponentSectionSize);

//Check Heightmap
uint32 HeightMapSizeX = SectionVerts * SectionSizeX;
uint32 HeightMapSizeY = SectionVerts * SectionSizeY;

//CPU读取贴图必须满足如下3个条件才能从PlatformData读取数据
// CompressionSettings 是否为VectorDisplacementmap。
// MipGenSettings 是否为NoMipmaps。
// SRGB 是否为未勾选状态。
//#TODO: 干掉Mip
//#TODO: 序列化数据,因为高度数据没有存放在cpu中,Runtime下无法获取
#if WITH_EDITOR
FColor* HeightMapData = reinterpret_cast<FColor*>(HeightmapTexture->Source.LockMip(0));
//check(HeightMapSizeX >= static_cast<uint32>(HeightmapTexture->GetSizeX()) && HeightMapSizeY >= static_cast<uint32>(HeightmapTexture->GetSizeY()));
//FTexture2DMipMap& MipData = HeightmapTexture->PlatformData->Mips[0];
//FColor* HeightMapData = reinterpret_cast<FColor*>(MipData.BulkData.Lock(LOCK_READ_ONLY));
#endif

//Calculate the BoundingBox
//The vertices are exactly aligned to the power of 2, so there is no need to calculate whether they are on the edge or clamp
//The memory layout is unified for each Component linear arrangement
TArray<FBox> SubmitToRenderThreadBoundingBox;
SubmitToRenderThreadBoundingBox.Reserve(ClusterSizeX * ClusterSizeY);

for (uint32 CompoenntY = 0; CompoenntY < LandscapeComponentSizeY; ++CompoenntY) {
for (uint32 ComponentX = 0; ComponentX < LandscapeComponentSizeX; ++ComponentX) {
for (uint32 LocalClusterIndexY = 0; LocalClusterIndexY < ClusterSizePerComponent; ++LocalClusterIndexY) {
for (uint32 LocalClusterIndexX = 0; LocalClusterIndexX < ClusterSizePerComponent; ++LocalClusterIndexX) {
FIntPoint GlobalClusterIndex = FIntPoint(LocalClusterIndexX + ComponentX * ClusterSizePerComponent, LocalClusterIndexY + CompoenntY * ClusterSizePerComponent);
//FLandscapeGpuRenderProxyComponent_RenderThread::GetLinearIndexByClusterIndex(GlobalClusterIndex);
//Create Box
FVector VertexStartPos = FVector(
(GlobalClusterIndex.X & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize + GlobalClusterIndex.X / ClusterSizePerSection * SectionSizeQuads,
(GlobalClusterIndex.Y & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize + GlobalClusterIndex.Y / ClusterSizePerSection * SectionSizeQuads,
0.f
);

FVector VertexEndPos = FVector(
((GlobalClusterIndex.X + 1) & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize + (GlobalClusterIndex.X + 1) / ClusterSizePerSection * SectionSizeQuads,
((GlobalClusterIndex.Y + 1) & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize + (GlobalClusterIndex.Y + 1) / ClusterSizePerSection * SectionSizeQuads,
0.f
);

FBox& BoxRef = SubmitToRenderThreadBoundingBox.Emplace_GetRef(VertexStartPos, VertexEndPos);
//Calculte Vertex
for (uint32 VertexY = 0; VertexY < LandscapeGpuRenderParameter::ClusterQuadSize; ++VertexY) {
for (uint32 VertexX = 0; VertexX < LandscapeGpuRenderParameter::ClusterQuadSize; ++VertexX) {
//SampleIndex use VertSize instead of SectionQuadsize
uint32 SampleX = VertexX + (GlobalClusterIndex.X & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize
+ GlobalClusterIndex.X / ClusterSizePerSection * SectionVerts;
uint32 SampleY = (VertexY + GlobalClusterIndex.Y * LandscapeGpuRenderParameter::ClusterQuadSize) * HeightMapSizeX;
uint32 HeightMapSampleIndex = SampleX + SampleY;
const auto& HeightValue = HeightMapData[HeightMapSampleIndex];
float VertexHeight = LandscapeDataAccess::GetLocalHeight(static_cast<uint16>(HeightValue.R << 8u | HeightValue.G));

//Update the Box
BoxRef.Min.Z = FMath::Min(BoxRef.Min.Z, VertexHeight);
BoxRef.Max.Z = FMath::Max(BoxRef.Max.Z, VertexHeight);
}
}
}
}
}
}
//for (uint32 ClusterIndexY = 0; ClusterIndexY < ClusterSizeY; ++ClusterIndexY) {
// for (uint32 ClusterIndexX = 0; ClusterIndexX < ClusterSizeX; ++ClusterIndexX) {
// //Create Box
// FVector VertexStartPos = FVector(
// (ClusterIndexX & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize + ClusterIndexX / ClusterSizePerSection * SectionSizeQuads,
// (ClusterIndexY & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize + ClusterIndexY / ClusterSizePerSection * SectionSizeQuads,
// 0.f
// );

// FVector VertexEndPos = FVector(
// ((ClusterIndexX + 1) & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize + (ClusterIndexX + 1) / ClusterSizePerSection * SectionSizeQuads,
// ((ClusterIndexY + 1) & (ClusterSizePerSection - 1)) * LandscapeGpuRenderParameter::ClusterQuadSize + (ClusterIndexY + 1) / ClusterSizePerSection * SectionSizeQuads,
// 0.f
// );

// FBox& BoxRef = SubmitToRenderThreadBoundingBox.Emplace_GetRef(VertexStartPos, VertexEndPos);

// }
//}

#if WITH_EDITOR
HeightmapTexture->Source.UnlockMip(0);
#endif

FMatrix LocalToWorldMatrix = GetRenderMatrix();
ENQUEUE_RENDER_COMMAND(RegisterGPURenderLandscapeEntity)(
[ClusterBoundingArray{ MoveTemp(SubmitToRenderThreadBoundingBox)}, LandscapeSubmitData, LocalToWorldMatrix](FRHICommandList& RHICmdList) {
auto& RenderComponent = FMobileLandscapeGPURenderSystem_RenderThread::GetLandscapeGPURenderComponent_RenderThread(LandscapeSubmitData.UniqueWorldId, LandscapeSubmitData.LandscapeKey);
RenderComponent.InitClusterData(ClusterBoundingArray, LocalToWorldMatrix);
}
);

bIsClusterBoundingCreated = true;
}
Loading

0 comments on commit fec7239

Please sign in to comment.