@@ -81,17 +81,28 @@ float4x4 Homogenize3x3(float3x3 R)
81
81
82
82
float4x4 PerspectiveProjection4x4(float a, float g, float n, float f)
83
83
{
84
- float b = (f + n) * rcp(f - n); // Z in [-1, 1]
85
- float c = -2 * f * n * rcp(f - n); // No Z-reversal
84
+ float b = (f + n) * rcp(f - n);
85
+ float c = -2 * f * n * rcp(f - n);
86
86
87
87
return float4x4(g/a, 0, 0, 0,
88
- 0, g, 0, 0,
89
- 0, 0, b, c,
90
- 0, 0, 1, 0);
88
+ 0, g, 0, 0, // No Y-flip
89
+ 0, 0, b, c, // Z in [-1, 1], no Z-reversal
90
+ 0, 0, 1, 0); // No W-flip
91
91
}
92
92
93
93
/* ------------------------------ Implementation ---------------------------- */
94
94
95
+ // !!! IMPORTANT !!!
96
+ // The legacy code from Morten provides us special projection matrices (and their inverses).
97
+ // These matrices are different from the matrices the HDRP uses.
98
+ // There is no reversed-Z buffering (effectively, forced UNITY_REVERSED_Z = 0).
99
+ // Additionally, there is no clip-space flip (effectively, forced UNITY_UV_STARTS_AT_TOP = 0).
100
+ // Therefore, all coordinate systems are left-handed, Y-up, without W-flip.
101
+ // Near and far planes are swapped in the case of Z-reversal, but it does not change the algorithm.
102
+ // y z
103
+ // | /
104
+ // 0 -- x
105
+
95
106
// Improve the quality of generated code at the expense of readability.
96
107
// Remove when the shader compiler is clever enough to perform this optimization for us.
97
108
#define OBTUSE_COMPILER
@@ -124,37 +135,37 @@ float4x4 PerspectiveProjection4x4(float a, float g, float n, float f)
124
135
125
136
// All planes and faces are always in the standard order (see below).
126
137
// Near and far planes are swapped in the case of Z-reversal, but it does not change the algorithm.
127
- #define FACE_LEFT (1 << 0) // -X z
128
- #define FACE_RIGHT (1 << 1) // +X /
129
- #define FACE_TOP (1 << 2) // -Y 0 -- x
130
- #define FACE_BOTTOM (1 << 3) // +Y |
131
- #define FACE_FRONT (1 << 4) // -Z y
138
+ #define FACE_LEFT (1 << 0) // -X
139
+ #define FACE_RIGHT (1 << 1) // +X
140
+ #define FACE_BOTTOM (1 << 2) // -Y
141
+ #define FACE_TOP (1 << 3) // +Y
142
+ #define FACE_FRONT (1 << 4) // -Z
132
143
#define FACE_BACK (1 << 5) // +Z
133
144
#define FACE_MASK ((1 << NUM_FACES) - 1)
134
145
135
146
// A list of vertices for each face (CCW order w.r.t. its normal, starting from the LSB).
136
- #define VERT_LIST_LEFT ((2 ) << 9 | (6) << 6 | (4 ) << 3 | (0) << 0)
137
- #define VERT_LIST_RIGHT ((5 ) << 9 | (7) << 6 | (3 ) << 3 | (1) << 0)
138
- #define VERT_LIST_TOP ((1) << 9 | (3 ) << 6 | (2 ) << 3 | (0) << 0)
139
- #define VERT_LIST_BOTTOM ((6) << 9 | (7) << 6 | (5 ) << 3 | (4 ) << 0)
140
- #define VERT_LIST_FRONT ((4 ) << 9 | (5 ) << 6 | (1) << 3 | (0) << 0)
141
- #define VERT_LIST_BACK ((3 ) << 9 | (7) << 6 | (6) << 3 | (2 ) << 0)
147
+ #define VERT_LIST_LEFT ((4 ) << 9 | (6) << 6 | (2 ) << 3 | (0) << 0)
148
+ #define VERT_LIST_RIGHT ((3 ) << 9 | (7) << 6 | (5 ) << 3 | (1) << 0)
149
+ #define VERT_LIST_BOTTOM ((1) << 9 | (5 ) << 6 | (4 ) << 3 | (0) << 0)
150
+ #define VERT_LIST_TOP ((6) << 9 | (7) << 6 | (3 ) << 3 | (2 ) << 0)
151
+ #define VERT_LIST_FRONT ((2 ) << 9 | (3 ) << 6 | (1) << 3 | (0) << 0)
152
+ #define VERT_LIST_BACK ((5 ) << 9 | (7) << 6 | (6) << 3 | (4 ) << 0)
142
153
143
154
// All vertices are always in the standard order (see below).
144
155
uint GetFaceMaskOfVertex(uint v)
145
156
{
146
- // 0: (-1, -1, -1) -> { FACE_LEFT | FACE_TOP | FACE_FRONT }
147
- // 1: (+1, -1, -1) -> { FACE_RIGHT | FACE_TOP | FACE_FRONT }
148
- // 2: (-1, +1, -1) -> { FACE_LEFT | FACE_BOTTOM | FACE_FRONT }
149
- // 3: (+1, +1, -1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_FRONT }
150
- // 4: (-1, -1, +1) -> { FACE_LEFT | FACE_TOP | FACE_BACK }
151
- // 5: (+1, -1, +1) -> { FACE_RIGHT | FACE_TOP | FACE_BACK }
152
- // 6: (-1, +1, +1) -> { FACE_LEFT | FACE_BOTTOM | FACE_BACK }
153
- // 7: (+1, +1, +1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_BACK }
157
+ // 0: (-1, -1, -1) -> { FACE_LEFT | FACE_BOTTOM | FACE_FRONT }
158
+ // 1: (+1, -1, -1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_FRONT }
159
+ // 2: (-1, +1, -1) -> { FACE_LEFT | FACE_TOP | FACE_FRONT }
160
+ // 3: (+1, +1, -1) -> { FACE_RIGHT | FACE_TOP | FACE_FRONT }
161
+ // 4: (-1, -1, +1) -> { FACE_LEFT | FACE_BOTTOM | FACE_BACK }
162
+ // 5: (+1, -1, +1) -> { FACE_RIGHT | FACE_BOTTOM | FACE_BACK }
163
+ // 6: (-1, +1, +1) -> { FACE_LEFT | FACE_TOP | FACE_BACK }
164
+ // 7: (+1, +1, +1) -> { FACE_RIGHT | FACE_TOP | FACE_BACK }
154
165
// ((v & 1) == 0) ? 1 : 2) | ((v & 2) == 0) ? 4 : 8) | ((v & 4) == 0) ? 16 : 32)
155
- uint f = (FACE_LEFT << BitFieldExtract(v, 0, 1))
156
- | (FACE_TOP << BitFieldExtract(v, 1, 1))
157
- | (FACE_FRONT << BitFieldExtract(v, 2, 1));
166
+ uint f = (FACE_LEFT << BitFieldExtract(v, 0, 1))
167
+ | (FACE_BOTTOM << BitFieldExtract(v, 1, 1))
168
+ | (FACE_FRONT << BitFieldExtract(v, 2, 1));
158
169
159
170
return f;
160
171
};
@@ -163,19 +174,19 @@ float3 GenerateVertexOfStandardCube(uint v)
163
174
{
164
175
float3 p;
165
176
166
- p.x = ((v & 1) == 0) ? -1 : 1;
167
- p.y = ((v & 2) == 0) ? -1 : 1;
168
- p.z = ((v & 4) == 0) ? -1 : 1;
177
+ p.x = ((v & 1) == 0) ? -1 : 1; // FACE_LEFT : FACE_RIGHT
178
+ p.y = ((v & 2) == 0) ? -1 : 1; // FACE_BOTTOM : FACE_TOP
179
+ p.z = ((v & 4) == 0) ? -1 : 1; // FACE_FRONT : FACE_BACK
169
180
170
181
return p;
171
182
}
172
183
173
184
uint GetVertexListOfFace(uint f)
174
185
{
175
186
// Warning: don't add 'static' here unless you want really bad code gen.
176
- const uint3 allVertLists = uint3((VERT_LIST_RIGHT << 12) | VERT_LIST_LEFT,
177
- (VERT_LIST_BOTTOM << 12) | VERT_LIST_TOP ,
178
- (VERT_LIST_BACK << 12) | VERT_LIST_FRONT);
187
+ const uint3 allVertLists = uint3((VERT_LIST_RIGHT << 12) | VERT_LIST_LEFT,
188
+ (VERT_LIST_TOP << 12) | VERT_LIST_BOTTOM ,
189
+ (VERT_LIST_BACK << 12) | VERT_LIST_FRONT);
179
190
180
191
return BitFieldExtract(allVertLists[f >> 1], 12 * (f & 1), 12);
181
192
}
@@ -384,6 +395,89 @@ void UpdateAaBb(uint srcBegin, uint srcSize, float4 vertRingBuffer[MAX_CLIP_VERT
384
395
}
385
396
}
386
397
398
+ // Given: 'C' is the center of the sphere in the view space, 'r' is its radius;
399
+ // 'projScale' and 'projOffset' are used to perform projection of the X (or Y) component of a vector.
400
+ float2 ComputeBoundsOfSphereOnProjectivePlane(float3 C, float r, float projScale, float projOffset)
401
+ {
402
+ float xMin, xMax;
403
+
404
+ // See sec. 8.2.1 of https://foundationsofgameenginedev.com/#fged2 for an alternative derivation.
405
+ // Goal: find the planes that pass through the origin O, bound the sphere, and form
406
+ // an axis-aligned rectangle at the intersection with the projection plane.
407
+ // Solution (for the X-coordinate):
408
+ // The intersection of the bounding planes and the projection plane must be vertical lines,
409
+ // which means that the bounding planes must be tangent to the Y-axis.
410
+ // The bounding planes must be also tangent to the sphere.
411
+ // Call the intersection points of the two vertical bounding planes and the bounding
412
+ // sphere B and D. Assume that B is on the left of C; D is on the right of C.
413
+ // Note that C may be behind the origin, so the same generally goes for B and D.
414
+ // BC is normal w.r.t. the bounding plane, so it is normal w.r.t. the Y-axis; |BC| = r.
415
+ // As a consequence, it lies in a plane parallel to the the O-X-Z plane.
416
+ // Consider B'C', which is an orthogonal projection of BC onto the actual O-X-Z plane.
417
+ // (Imagine sliding the sphere up or down between the bounding planes).
418
+ // We then consider a triangle OB'C' that lies entirely in the O-X-Z plane.
419
+ // The coordinates are: OB' = (b.x, 0, b.z), OC' = (c.x, 0, c.z).
420
+ float3 B, D;
421
+ // OBC is a right triangle. So is OB'C'.
422
+ // |BC| = |B'C'| = r.
423
+ // |OB'|^2 = |OC'|^2 - |B'C'|^2.
424
+ float lenSqOC = dot(C.xz, C.xz);
425
+ float lenSqOB = lenSqOC - r * r;
426
+ // If |OB'| = 0 or |OC'| = 0, the bounding planes tangent to the sphere do not exist.
427
+ if (lenSqOB > 0)
428
+ {
429
+ float lenOB = sqrt(lenSqOB);
430
+ // |OB' x OC'| = |OB'| * |OC'| * Sin[a'].
431
+ // OB' . OC' = |OB'| * |OC'| * Cos[a'].
432
+ // We can determine Sin[a'] = |B'C'| / |OC'| = R / |OC'|.
433
+ // Cos[a'] = Sqrt[1 - Sin[a']^2].
434
+ // (OB' x OC') points along Y.
435
+ // (OB' x OC').y = b.z * c.x - b.x * c.z.
436
+ // Therefore, b.z * c.x - b.x * c.z = |OB'| * |OC'| * Sin[a'].
437
+ // OB' . OC' = b.x * c.x + b.z * c.z = |OB'| * |OC'| * Cos[a'].
438
+ // Since we don't care about the scale, and |OB'| != 0 and |OC'| != 0,
439
+ // we can equivalently solve
440
+ // z * c.x - x * c.z = |OC'|^3 * Sin[a'].
441
+ // x * c.x + z * c.z = |OC'|^3 * Cos[a'].
442
+ // With 2 equations and 2 unknowns, we can easily solve this linear system.
443
+ // The solutions is
444
+ // x = -c.z * r + c.x * |OB'|.
445
+ // z = c.x * r + c.z * |OB'|.
446
+ B.x = C.x * lenOB - (C.z * r);
447
+ B.z = C.z * lenOB + (C.x * r);
448
+ // (OD' x OC') points along Y.
449
+ // (OD' x OC').y = d.z * c.x - d.x * c.z.
450
+ // We must solve
451
+ // z * c.x - x * c.z = -|OC'|^3 * Sin[a'].
452
+ // x * c.x + z * c.z = |OC'|^3 * Cos[a'].
453
+ // The solution is
454
+ // x = c.z * r + c.x * |OB'|.
455
+ // z = -c.x * r + c.z * |OB'|.
456
+ D.x = C.x * lenOB + (C.z * r);
457
+ D.z = C.z * lenOB - (C.x * r);
458
+ // We can transform OB and OD as direction vectors.
459
+ // For the simplification below, see OptimizeProjectionMatrix.
460
+ float rapBx = (B.x * rcp(B.z)) * projScale + projOffset;
461
+ float rapDx = (D.x * rcp(D.z)) * projScale + projOffset;
462
+ // One problem with the above is that this direction may, for certain spheres,
463
+ // point behind the origin (B.z <= 0 or D.z <= 0).
464
+ // At this point we know that the sphere at least *partially* in front of the origin,
465
+ // and that it is we are not inside the sphere, so there is at least one valid
466
+ // plane (and one valid direction). We just need the second direction to go "in front"
467
+ // of the first one to extend the bounding box.
468
+ xMin = (B.z > 0) ? rapBx : -FLT_INF;
469
+ xMax = (D.z > 0) ? rapDx : FLT_INF;
470
+ }
471
+ else
472
+ {
473
+ // Conservative estimate (we do not cull the bounding sphere using the view frustum).
474
+ xMin = -1;
475
+ xMax = 1;
476
+ }
477
+
478
+ return float2(xMin, xMax);
479
+ }
480
+
387
481
//**********************************************************************************************
388
482
// The goal of this program is to compute the AABB of the light in the NDC space ([0, 1] range).
389
483
// The light is represented by a convex volume (a cuboid) with 6 faces (planar quads) and 8 vertices.
@@ -429,11 +523,14 @@ void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
429
523
const float4x4 projMat = g_mProjectionArr[eyeIndex];
430
524
const float4x4 invProjMat = g_mInvProjectionArr[eyeIndex];
431
525
432
- const float scale = cullData.scaleXY; // scale.x = scale.y
526
+ // Bounding frustum.
433
527
const float3 rbpC = cullData.center.xyz; // View-space
434
528
const float3 rbpX = cullData.boxAxisX.xyz; // Pre-scaled
435
529
const float3 rbpY = cullData.boxAxisY.xyz; // Pre-scaled
436
530
const float3 rbpZ = cullData.boxAxisZ.xyz; // Pre-scaled
531
+ const float scale = cullData.scaleXY; // scale.x = scale.y
532
+ // Bounding sphere.
533
+ const float radius = cullData.radius;
437
534
438
535
#ifndef PLATFORM_SUPPORTS_WAVE_INTRINSICS
439
536
// (0) Initialize the TGSM.
@@ -523,6 +620,7 @@ void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
523
620
}
524
621
else // Outside
525
622
{
623
+ // Mark all the faces of the bounding frustum associated with this vertex.
526
624
cullClipFaceMask |= GetFaceMaskOfVertex(v);
527
625
}
528
626
@@ -558,6 +656,8 @@ void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
558
656
// We perform aggressive culling, so we must make sure they are accounted for.
559
657
// The light volume is a special type of cuboid - a right frustum.
560
658
// We can exploit this fact by building a light-space projection matrix.
659
+ // P_v = T * (R * S) * P_l
660
+ // P_l = (R * S)^{-1} * T^{-1} * P_v
561
661
float4x4 invTranslateToLightSpace = Translation4x4(-rbpC);
562
662
float4x4 invRotateAndScaleInLightSpace = Homogenize3x3(Invert3x3(ScaledRotation3x3(rbpX, rbpY, rbpZ)));
563
663
// TODO: avoid full inversion by using unit vectors and passing magnitudes explicitly.
@@ -722,6 +822,54 @@ void main(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID)
722
822
ndcAaBbMaxPt.w = asfloat(gs_NdcAaBbMaxPtW[intraGroupLightIndex]);
723
823
#endif // PLATFORM_SUPPORTS_WAVE_INTRINSICS
724
824
825
+ // (5) Compute the AABB of the bounding sphere.
826
+ if (radius > 0)
827
+ {
828
+ // Occasionally, an intersection of AABBs of a bounding sphere and a bounding frustum
829
+ // results in a tighter AABB when compared to using the AABB of the frustum alone.
830
+ // That is the case (mostly) for sphere-capped spot lights with very wide angles.
831
+ // Note that, unfortunately, it is not quite as tight as an AABB of a CSG intersection
832
+ // of a sphere and frustum. Also note that the algorithm below doesn't clip the bounding
833
+ // sphere against the view frustum before computing the bounding box, simply because it is
834
+ // too hard/expensive. I will leave it as a TODO in case someone wants to tackle this problem.
835
+ if ((rbpC.z + radius) > 0) // Is the sphere at least *partially* in front of the origin?
836
+ {
837
+ ndcAaBbMinPt.w = max(ndcAaBbMinPt.w, rbpC.z - radius);
838
+ ndcAaBbMaxPt.w = min(ndcAaBbMaxPt.w, rbpC.z + radius);
839
+ // Computing the 'z' component for an arbitrary projection matrix is hard, so we don't do it.
840
+ // See sec. 8.2.2 of https://foundationsofgameenginedev.com/#fged2 for a solution.
841
+
842
+ float2 rectMin, rectMax;
843
+
844
+ // For the 'x' and 'y' components, the solution is given below.
845
+ if (g_isOrthographic)
846
+ {
847
+ // Compute the center and the extents (half-diagonal) of the bounding box.
848
+ float2 center = mul(projMat, float4(rbpC.xyz, 1)).xy;
849
+ float2 extents = mul(projMat, float4(radius.xx, 0, 0)).xy;
850
+
851
+ rectMin = center - extents;
852
+ rectMax = center + extents;
853
+ }
854
+ else // Perspective
855
+ {
856
+ float2 xBounds = ComputeBoundsOfSphereOnProjectivePlane(rbpC.xxz, radius, projMat._m00, projMat._m02); // X-Z plane
857
+ float2 yBounds = ComputeBoundsOfSphereOnProjectivePlane(rbpC.yyz, radius, projMat._m11, projMat._m12); // Y-Z plane
858
+
859
+ rectMin = float2(xBounds.r, yBounds.r);
860
+ rectMax = float2(xBounds.g, yBounds.g);
861
+ }
862
+
863
+ // Transform to the NDC coordinates.
864
+ rectMin = rectMin * 0.5 + 0.5;
865
+ rectMax = rectMax * 0.5 + 0.5;
866
+
867
+ // Note: separating the X- and Y-computations across 2 threads is not worth it.
868
+ ndcAaBbMinPt.xy = max(ndcAaBbMinPt.xy, rectMin);
869
+ ndcAaBbMaxPt.xy = min(ndcAaBbMaxPt.xy, rectMax);
870
+ }
871
+ }
872
+
725
873
if ((globalLightIndex < (uint)g_iNrVisibLights) && (t % THREADS_PER_LIGHT == 0)) // Avoid bank conflicts
726
874
{
727
875
// For stereo, we have two sets of lights. Therefore, each eye has a set of mins
0 commit comments