Add bounding sphere support to the SS AABB generation pass (#2281)

EvgeniiG · sebastienlagarde · sebastienlagarde · commit db84c09719f0 · 2020-10-25T13:47:17.000+01:00
* Implement clipping and culling (does not consider view frustum corners)

* Support orthographic projection

* Turn 'scaleXY' into a scalar

* Test corners of the view volume

* Improve the placeholder for the linear depth

* Fix aspect

* Bugfix

* Optimize

* Also store view space Z

* Optimize orthographic

* Optimize LUT

* Add wave intrinsic support

* Fix group count

* Reduce the kernel count to 1

* Remove old code

* Bounds check

* Add a profiling marker

* Fix lane masks

* Fix compiler warning

* Remove GPU Pro reference

* Be politically correct

* No instrinsics on Xbox

* Sphere culling (draft)

* Tighter bounding spheres

* Works

* Share code

* Optimize

* Comment

* Changelog

* Fix BSphere radius

* Optimize

* Better comment

* Remove the cull check

* Fix Y-flip in light culling code

Co-authored-by: sebastienlagarde &lt;sebastien@unity3d.com&gt;
diff --git a/com.unity.render-pipelines.high-definition/CHANGELOG.md b/com.unity.render-pipelines.high-definition/CHANGELOG.md
@@ -21,11 +21,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - Fixed precision issue with the atmospheric fog.
 - Fixed issue with TAA and no motion vectors.
 - Fixed the stripping not working the terrain alphatest feature required for terrain holes (case 1205902).
+- Fixed bounding box generation that resulted in incorrect light culling (case 3875925).
 
 ### Changed
 - Combined occlusion meshes into one to reduce draw calls and state changes with XR single-pass.
 - Claryfied doc for the LayeredLit material.
 - Various improvements for the Volumetric Fog.
+- Use draggable fields for float scalable settings
 
 ## [10.1.0] - 2020-10-12
 
@@ -63,6 +65,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - Adding missing marker for ray tracing profiling (RaytracingDeferredLighting)
 - Added the support of eye shader for ray tracing.
 - Exposed Refraction Model to the material UI when using a Lit ShaderGraph.
+- Added bounding sphere support to screen-space axis-aligned bounding box generation pass.
 
 ### Fixed
 - Fixed several issues with physically-based DoF (TAA ghosting of the CoC buffer, smooth layer transitions, etc)
@@ -190,7 +193,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - Fixed cullmode for SceneSelectionPass.
 - Fixed issue that caused non-static object to not render at times in OnEnable reflection probes.
 - Baked reflection probes now correctly use static sky for ambient lighting.
-- Use draggable fields for float scalable settings
 
 ### Changed
 - Preparation pass for RTSSShadows to be supported by render graph.
diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs
@@ -1806,7 +1806,7 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig
                 bound.boxAxisX = extents.x * xAxisVS;
                 bound.boxAxisY = extents.y * yAxisVS;
                 bound.boxAxisZ = extents.z * zAxisVS;
-                bound.radius   = extents.magnitude;
+                bound.radius   = extents.x;
                 bound.scaleXY  = 1.0f;
 
                 lightVolumeData.lightPos   = centerVS;
@@ -1822,11 +1822,13 @@ void GetLightVolumeDataAndBound(LightCategory lightCategory, GPULightType gpuLig
                 Vector3 extents    = 0.5f * dimensions;
                 Vector3 centerVS   = positionVS + extents.z * zAxisVS;
 
+                float d = range + 0.5f * Mathf.Sqrt(lightDimensions.x * lightDimensions.x + lightDimensions.y * lightDimensions.y);
+
                 bound.center   = centerVS;
                 bound.boxAxisX = extents.x * xAxisVS;
                 bound.boxAxisY = extents.y * yAxisVS;
                 bound.boxAxisZ = extents.z * zAxisVS;
-                bound.radius   = extents.magnitude;
+                bound.radius   = Mathf.Sqrt(d * d + (0.5f * range) * (0.5f * range));
                 bound.scaleXY  = 1.0f;
 
                 lightVolumeData.lightPos   = centerVS;
diff --git a/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute b/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/scrbound.compute
@@ -81,17 +81,28 @@ float4x4 Homogenize3x3(float3x3 R)
 
 float4x4 PerspectiveProjection4x4(float a, float g, float n, float f)
 {
-    float b = (f + n) * rcp(f - n);    // Z in [-1, 1]
-    float c = -2 * f * n * rcp(f - n); // No Z-reversal
+    float b = (f + n) * rcp(f - n);
+    float c = -2 * f * n * rcp(f - n);
 
     return float4x4(g/a, 0, 0, 0,
-                      0, g, 0, 0,
-                      0, 0, b, c,
-                      0, 0, 1, 0);
+                      0, g, 0, 0,  // No Y-flip
+                      0, 0, b, c,  // Z in [-1, 1], no Z-reversal
+                      0, 0, 1, 0); // No W-flip
 }
 
 /* ------------------------------ Implementation ---------------------------- */
 
+// !!! IMPORTANT !!!
+// The legacy code from Morten provides us special projection matrices (and their inverses).
+// These matrices are different from the matrices the HDRP uses.
+// There is no reversed-Z buffering (effectively, forced UNITY_REVERSED_Z = 0).
+// Additionally, there is no clip-space flip (effectively, forced UNITY_UV_STARTS_AT_TOP = 0).
+// Therefore, all coordinate systems are left-handed, Y-up, without W-flip.
+// Near and far planes are swapped in the case of Z-reversal, but it does not change the algorithm.
+// y  z
+// | /
+// 0 -- x
+
 // Improve the quality of generated code at the expense of readability.
 // Remove when the shader compiler is clever enough to perform this optimization for us.
 #define OBTUSE_COMPILER
@@ -124,37 +135,37 @@ float4x4 PerspectiveProjection4x4(float a, float g, float n, float f)
 
 // All planes and faces are always in the standard order (see below).
 // Near and far planes are swapped in the case of Z-reversal, but it does not change the algorithm.
-#define FACE_LEFT   (1 << 0) // -X     z
-#define FACE_RIGHT  (1 << 1) // +X    /
-#define FACE_TOP    (1 << 2) // -Y   0 -- x
-#define FACE_BOTTOM (1 << 3) // +Y   |
-#define FACE_FRONT  (1 << 4) // -Z   y
+#define FACE_LEFT   (1 << 0) // -X
+#define FACE_RIGHT  (1 << 1) // +X
+#define FACE_BOTTOM (1 << 2) // -Y
+#define FACE_TOP    (1 << 3) // +Y
+#define FACE_FRONT  (1 << 4) // -Z
 #define FACE_BACK   (1 << 5) // +Z
 #define FACE_MASK   ((1 << NUM_FACES) - 1)
 
 // A list of vertices for each face (CCW order w.r.t. its normal, starting from the LSB).
-#define VERT_LIST_LEFT   ((2) << 9 | (6) << 6 | (4) << 3 | (0) << 0)
-#define VERT_LIST_RIGHT  ((5) << 9 | (7) << 6 | (3) << 3 | (1) << 0)
-#define VERT_LIST_TOP    ((1) << 9 | (3) << 6 | (2) << 3 | (0) << 0)
-#define VERT_LIST_BOTTOM ((6) << 9 | (7) << 6 | (5) << 3 | (4) << 0)
-#define VERT_LIST_FRONT  ((4) << 9 | (5) << 6 | (1) << 3 | (0) << 0)
-#define VERT_LIST_BACK   ((3) << 9 | (7) << 6 | (6) << 3 | (2) << 0)
+#define VERT_LIST_LEFT   ((4) << 9 | (6) << 6 | (2) << 3 | (0) << 0)
+#define VERT_LIST_RIGHT  ((3) << 9 | (7) << 6 | (5) << 3 | (1) << 0)
+#define VERT_LIST_BOTTOM ((1) << 9 | (5) << 6 | (4) << 3 | (0) << 0)
+#define VERT_LIST_TOP    ((6) << 9 | (7) << 6 | (3) << 3 | (2) << 0)
+#define VERT_LIST_FRONT  ((2) << 9 | (3) << 6 | (1) << 3 | (0) << 0)
+#define VERT_LIST_BACK   ((5) << 9 | (7) << 6 | (6) << 3 | (4) << 0)
 
 // All vertices are always in the standard order (see below).
 uint GetFaceMaskOfVertex(uint v)
 {
-    // 0: (-1, -1, -1) -> { FACE_LEFT  | FACE_TOP    | FACE_FRONT }
-    // 1: (+1, -1, -1) -> { FACE_RIGHT | FACE_TOP    | FACE_FRONT }
-    // 2: (-1, +1, -1) -> { FACE_LEFT  | FACE_BOTTOM | FACE_FRONT }
-    // 3: (+1, +1, -1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_FRONT }
-    // 4: (-1, -1, +1) -> { FACE_LEFT  | FACE_TOP    | FACE_BACK  }
-    // 5: (+1, -1, +1) -> { FACE_RIGHT | FACE_TOP    | FACE_BACK  }
-    // 6: (-1, +1, +1) -> { FACE_LEFT  | FACE_BOTTOM | FACE_BACK  }
-    // 7: (+1, +1, +1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_BACK  }
+    // 0: (-1, -1, -1) -> { FACE_LEFT  | FACE_BOTTOM | FACE_FRONT }
+    // 1: (+1, -1, -1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_FRONT }
+    // 2: (-1, +1, -1) -> { FACE_LEFT  | FACE_TOP    | FACE_FRONT }
+    // 3: (+1, +1, -1) -> { FACE_RIGHT | FACE_TOP    | FACE_FRONT }
+    // 4: (-1, -1, +1) -> { FACE_LEFT  | FACE_BOTTOM | FACE_BACK  }
+    // 5: (+1, -1, +1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_BACK  }
+    // 6: (-1, +1, +1) -> { FACE_LEFT  | FACE_TOP    | FACE_BACK  }
+    // 7: (+1, +1, +1) -> { FACE_RIGHT | FACE_TOP    | FACE_BACK  }
     // ((v & 1) == 0) ? 1 : 2) | ((v & 2) == 0) ? 4 : 8) | ((v & 4) == 0) ? 16 : 32)
-    uint f = (FACE_LEFT  << BitFieldExtract(v, 0, 1))
-           | (FACE_TOP   << BitFieldExtract(v, 1, 1))
-           | (FACE_FRONT << BitFieldExtract(v, 2, 1));
+    uint f = (FACE_LEFT   << BitFieldExtract(v, 0, 1))
+           | (FACE_BOTTOM << BitFieldExtract(v, 1, 1))
+           | (FACE_FRONT  << BitFieldExtract(v, 2, 1));
 
     return f;
 };
@@ -163,19 +174,19 @@ float3 GenerateVertexOfStandardCube(uint v)
 {
     float3 p;
 
-    p.x = ((v & 1) == 0) ? -1 : 1;
-    p.y = ((v & 2) == 0) ? -1 : 1;
-    p.z = ((v & 4) == 0) ? -1 : 1;
+    p.x = ((v & 1) == 0) ? -1 : 1; // FACE_LEFT   : FACE_RIGHT
+    p.y = ((v & 2) == 0) ? -1 : 1; // FACE_BOTTOM : FACE_TOP
+    p.z = ((v & 4) == 0) ? -1 : 1; // FACE_FRONT  : FACE_BACK
 
     return p;
 }
 
 uint GetVertexListOfFace(uint f)
 {
     // Warning: don't add 'static' here unless you want really bad code gen.
-    const uint3 allVertLists = uint3((VERT_LIST_RIGHT  << 12) | VERT_LIST_LEFT,
-                                     (VERT_LIST_BOTTOM << 12) | VERT_LIST_TOP,
-                                     (VERT_LIST_BACK   << 12) | VERT_LIST_FRONT);
+    const uint3 allVertLists = uint3((VERT_LIST_RIGHT << 12) | VERT_LIST_LEFT,
+                                     (VERT_LIST_TOP   << 12) | VERT_LIST_BOTTOM,
+                                     (VERT_LIST_BACK  << 12) | VERT_LIST_FRONT);
 
     return BitFieldExtract(allVertLists[f >> 1], 12 * (f & 1), 12);
 }
@@ -384,6 +395,89 @@ void UpdateAaBb(uint srcBegin, uint srcSize, float4 vertRingBuffer[MAX_CLIP_VERT
     }
 }
 
+// Given: 'C' is the center of the sphere in the view space, 'r' is its radius;
+// 'projScale' and 'projOffset' are used to perform projection of the X (or Y) component of a vector.
+float2 ComputeBoundsOfSphereOnProjectivePlane(float3 C, float r, float projScale, float projOffset)
+{
+    float xMin, xMax;
+
+    // See sec. 8.2.1 of https://foundationsofgameenginedev.com/#fged2 for an alternative derivation.
+    // Goal: find the planes that pass through the origin O, bound the sphere, and form
+    // an axis-aligned rectangle at the intersection with the projection plane.
+    // Solution (for the X-coordinate):
+    // The intersection of the bounding planes and the projection plane must be vertical lines,
+    // which means that the bounding planes must be tangent to the Y-axis.
+    // The bounding planes must be also tangent to the sphere.
+    // Call the intersection points of the two vertical bounding planes and the bounding
+    // sphere B and D. Assume that B is on the left of C; D is on the right of C.
+    // Note that C may be behind the origin, so the same generally goes for B and D.
+    // BC is normal w.r.t. the bounding plane, so it is normal w.r.t. the Y-axis; |BC| = r.
+    // As a consequence, it lies in a plane parallel to the the O-X-Z plane.
+    // Consider B'C', which is an orthogonal projection of BC onto the actual O-X-Z plane.
+    // (Imagine sliding the sphere up or down between the bounding planes).
+    // We then consider a triangle OB'C' that lies entirely in the O-X-Z plane.
+    // The coordinates are: OB' = (b.x, 0, b.z), OC' = (c.x, 0, c.z).
+    float3 B, D;
+    // OBC is a right triangle. So is OB'C'.
+    // |BC| = |B'C'| = r.
+    // |OB'|^2 = |OC'|^2 - |B'C'|^2.
+    float lenSqOC = dot(C.xz, C.xz);
+    float lenSqOB = lenSqOC - r * r;
+    // If |OB'| = 0 or |OC'| = 0, the bounding planes tangent to the sphere do not exist.
+    if (lenSqOB > 0)
+    {
+        float lenOB = sqrt(lenSqOB);
+        // |OB' x OC'| = |OB'| * |OC'| * Sin[a'].
+        //  OB' . OC'  = |OB'| * |OC'| * Cos[a'].
+        // We can determine Sin[a'] = |B'C'| / |OC'| = R / |OC'|.
+        // Cos[a'] = Sqrt[1 - Sin[a']^2].
+        // (OB' x OC') points along Y.
+        // (OB' x OC').y = b.z * c.x - b.x * c.z.
+        // Therefore,  b.z * c.x - b.x * c.z = |OB'| * |OC'| * Sin[a'].
+        // OB' . OC' = b.x * c.x + b.z * c.z = |OB'| * |OC'| * Cos[a'].
+        // Since we don't care about the scale, and |OB'| != 0 and |OC'| != 0,
+        // we can equivalently solve
+        // z * c.x - x * c.z = |OC'|^3 * Sin[a'].
+        // x * c.x + z * c.z = |OC'|^3 * Cos[a'].
+        // With 2 equations and 2 unknowns, we can easily solve this linear system.
+        // The solutions is
+        // x = -c.z * r + c.x * |OB'|.
+        // z =  c.x * r + c.z * |OB'|.
+        B.x = C.x * lenOB - (C.z * r);
+        B.z = C.z * lenOB + (C.x * r);
+        // (OD' x OC') points along Y.
+        // (OD' x OC').y = d.z * c.x - d.x * c.z.
+        // We must solve
+        // z * c.x - x * c.z = -|OC'|^3 * Sin[a'].
+        // x * c.x + z * c.z =  |OC'|^3 * Cos[a'].
+        // The solution is
+        // x =  c.z * r + c.x * |OB'|.
+        // z = -c.x * r + c.z * |OB'|.
+        D.x = C.x * lenOB + (C.z * r);
+        D.z = C.z * lenOB - (C.x * r);
+        // We can transform OB and OD as direction vectors.
+        // For the simplification below, see OptimizeProjectionMatrix.
+        float rapBx = (B.x * rcp(B.z)) * projScale + projOffset;
+        float rapDx = (D.x * rcp(D.z)) * projScale + projOffset;
+        // One problem with the above is that this direction may, for certain spheres,
+        // point behind the origin (B.z <= 0 or D.z <= 0).
+        // At this point we know that the sphere at least *partially* in front of the origin,
+        // and that it is we are not inside the sphere, so there is at least one valid
+        // plane (and one valid direction). We just need the second direction to go "in front"
+        // of the first one to extend the bounding box.
+        xMin = (B.z > 0) ? rapBx : -FLT_INF;
+        xMax = (D.z > 0) ? rapDx :  FLT_INF;
+    }
+    else
+    {
+        // Conservative estimate (we do not cull the bounding sphere using the view frustum).
+        xMin = -1;
+        xMax =  1;
+    }
+
+    return float2(xMin, xMax);
+}
+
 //**********************************************************************************************
 // The goal of this program is to compute the AABB of the light in the NDC space ([0, 1] range).
 // The light is represented by a convex volume (a cuboid) with 6 faces (planar quads) and 8 vertices.
@@ -429,11 +523,14 @@ void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
     const float4x4 projMat    = g_mProjectionArr[eyeIndex];
     const float4x4 invProjMat = g_mInvProjectionArr[eyeIndex];
 
-    const float  scale = cullData.scaleXY;      // scale.x = scale.y
+    // Bounding frustum.
     const float3 rbpC  = cullData.center.xyz;   // View-space
     const float3 rbpX  = cullData.boxAxisX.xyz; // Pre-scaled
     const float3 rbpY  = cullData.boxAxisY.xyz; // Pre-scaled
     const float3 rbpZ  = cullData.boxAxisZ.xyz; // Pre-scaled
+    const float scale  = cullData.scaleXY;      // scale.x = scale.y
+    // Bounding sphere.
+    const float radius = cullData.radius;
 
 #ifndef PLATFORM_SUPPORTS_WAVE_INTRINSICS
     // (0) Initialize the TGSM.
@@ -523,6 +620,7 @@ void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
         }
         else // Outside
         {
+            // Mark all the faces of the bounding frustum associated with this vertex.
             cullClipFaceMask |= GetFaceMaskOfVertex(v);
         }
 
@@ -558,6 +656,8 @@ void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
         // We perform aggressive culling, so we must make sure they are accounted for.
         // The light volume is a special type of cuboid - a right frustum.
         // We can exploit this fact by building a light-space projection matrix.
+        // P_v = T * (R * S) * P_l
+        // P_l = (R * S)^{-1} * T^{-1} * P_v
         float4x4 invTranslateToLightSpace      = Translation4x4(-rbpC);
         float4x4 invRotateAndScaleInLightSpace = Homogenize3x3(Invert3x3(ScaledRotation3x3(rbpX, rbpY, rbpZ)));
         // TODO: avoid full inversion by using unit vectors and passing magnitudes explicitly.
@@ -722,6 +822,54 @@ void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
     ndcAaBbMaxPt.w = asfloat(gs_NdcAaBbMaxPtW[intraGroupLightIndex]);
 #endif // PLATFORM_SUPPORTS_WAVE_INTRINSICS
 
+    // (5) Compute the AABB of the bounding sphere.
+    if (radius > 0)
+    {
+        // Occasionally, an intersection of AABBs of a bounding sphere and a bounding frustum
+        // results in a tighter AABB when compared to using the AABB of the frustum alone.
+        // That is the case (mostly) for sphere-capped spot lights with very wide angles.
+        // Note that, unfortunately, it is not quite as tight as an AABB of a CSG intersection
+        // of a sphere and frustum. Also note that the algorithm below doesn't clip the bounding
+        // sphere against the view frustum before computing the bounding box, simply because it is
+        // too hard/expensive. I will leave it as a TODO in case someone wants to tackle this problem.
+        if ((rbpC.z + radius) > 0) // Is the sphere at least *partially* in front of the origin?
+        {
+            ndcAaBbMinPt.w = max(ndcAaBbMinPt.w, rbpC.z - radius);
+            ndcAaBbMaxPt.w = min(ndcAaBbMaxPt.w, rbpC.z + radius);
+            // Computing the 'z' component for an arbitrary projection matrix is hard, so we don't do it.
+            // See sec. 8.2.2 of https://foundationsofgameenginedev.com/#fged2 for a solution.
+
+            float2 rectMin, rectMax;
+
+            // For the 'x' and 'y' components, the solution is given below.
+            if (g_isOrthographic)
+            {
+                // Compute the center and the extents (half-diagonal) of the bounding box.
+                float2 center  = mul(projMat, float4(rbpC.xyz,     1)).xy;
+                float2 extents = mul(projMat, float4(radius.xx, 0, 0)).xy;
+
+                rectMin = center - extents;
+                rectMax = center + extents;
+            }
+            else // Perspective
+            {
+                float2 xBounds = ComputeBoundsOfSphereOnProjectivePlane(rbpC.xxz, radius, projMat._m00, projMat._m02); // X-Z plane
+                float2 yBounds = ComputeBoundsOfSphereOnProjectivePlane(rbpC.yyz, radius, projMat._m11, projMat._m12); // Y-Z plane
+
+                rectMin = float2(xBounds.r, yBounds.r);
+                rectMax = float2(xBounds.g, yBounds.g);
+            }
+
+            // Transform to the NDC coordinates.
+            rectMin = rectMin * 0.5 + 0.5;
+            rectMax = rectMax * 0.5 + 0.5;
+
+            // Note: separating the X- and Y-computations across 2 threads is not worth it.
+            ndcAaBbMinPt.xy = max(ndcAaBbMinPt.xy, rectMin);
+            ndcAaBbMaxPt.xy = min(ndcAaBbMaxPt.xy, rectMax);
+        }
+    }
+
     if ((globalLightIndex < (uint)g_iNrVisibLights) && (t % THREADS_PER_LIGHT == 0)) // Avoid bank conflicts
     {
         // For stereo, we have two sets of lights. Therefore, each eye has a set of mins