Skip to content

Commit 1908749

Browse files
Bugfix sincos_accumulator::create
Also flesh out future optimizations for faster angle adding
1 parent 3d7ded6 commit 1908749

File tree

1 file changed

+14
-6
lines changed

1 file changed

+14
-6
lines changed

include/nbl/builtin/hlsl/math/angle_adding.hlsl

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ struct sincos_accumulator
3131
assert(sinA >= T(0.0));
3232
this_t retval;
3333
retval.runningSum = complex_t<T>::create(cosA, sinA);
34-
retval.wraparound = hlsl::mix(T(0.0), T(1.0), cosA == T(-1.0));
34+
retval.wraparound = T(0.0);
3535
return retval;
3636
}
3737

@@ -41,27 +41,35 @@ struct sincos_accumulator
4141
// This way, we can accumulate the sines and cosines of a set of angles, and only have to do one acos() call to retrieve the final sum
4242
void addAngle(T cosA, T sinA)
4343
{
44-
const T a = cosA;
4544
const T cosB = runningSum.real();
4645
const T sinB = runningSum.imag();
46+
// TODO: prove if we infer overflow from sign of `d` instead
4747
const bool overflow = abs<T>(min<T>(a, cosB)) > max<T>(a, cosB);
48-
const T c = a * cosB - sinA * sinB;
49-
const T d = sinA * cosB + a * sinB;
48+
const T c = cosA * cosB - sinA * sinB;
49+
const T d = sinA * cosB + cosA * sinB;
5050

51+
// TODO: figure out if we can skip flipping signs until `getSumOfArccos()`, this would mean that on odd overflows the formula is
52+
// const T c = sinA * sinB - cosA * cosB;
53+
// const T d = - sinA * cosB - cosA * sinB;
54+
// but both signs get inverted so the `abs()` of both terms gets preserved, so the equation can stay as is.
55+
// And in `getSumOfArccos()` we could do either of:
56+
// return acos<T>((intWraparound<<signBitOffset<T>)^runningSum.real()) + intWraparound * numbers::pi<T>;
57+
// return (intWraparound+(intWraparound&0x1u)) * numbers::pi<T> + (intWraparound<<signBitOffset<T>)^acos<T>(runningSum.real());
5158
runningSum.real(ieee754::flipSign<T>(c, overflow));
5259
runningSum.imag(ieee754::flipSign<T>(d, overflow));
5360

5461
if (overflow)
55-
wraparound++;
62+
wraparound += numbers::pi<T>;
5663
}
5764
void addCosine(T cosA)
5865
{
5966
addAngle(cosA, sqrt<T>(T(1.0) - cosA * cosA));
6067
}
6168

69+
// TODO: rename to `getSumOfArccos`
6270
T getSumofArccos()
6371
{
64-
return acos<T>(runningSum.real()) + wraparound * numbers::pi<T>;
72+
return acos<T>(runningSum.real()) + wraparound;
6573
}
6674

6775
complex_t<T> runningSum;

0 commit comments

Comments
 (0)