diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl index 3a7c4963c2..0178673f4e 100644 --- a/include/nbl/builtin/hlsl/algorithm.hlsl +++ b/include/nbl/builtin/hlsl/algorithm.hlsl @@ -18,7 +18,7 @@ namespace impl // TODO: use structs template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { T tmp = lhs; lhs = rhs; @@ -26,7 +26,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -34,7 +34,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -42,7 +42,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -50,7 +50,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -58,7 +58,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -66,7 +66,7 @@ namespace impl } template<> - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs) { lhs ^= rhs; rhs ^= lhs; @@ -74,7 +74,7 @@ namespace impl } #else template - NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { std::swap(lhs, rhs); } @@ -82,7 +82,7 @@ namespace impl } template -NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) +NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs) { impl::swap(lhs, rhs); } diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl index 6728a9bf3d..292271bb87 100644 --- a/include/nbl/builtin/hlsl/complex.hlsl +++ b/include/nbl/builtin/hlsl/complex.hlsl @@ -60,33 +60,34 @@ struct complex_t Scalar m_imag; // ------------------------- Constructors --------------------------------------- - static complex_t create(const Scalar real, const Scalar imag) + static complex_t create(NBL_CONST_REF_ARG(Scalar) real, NBL_CONST_REF_ARG(Scalar) imag) { - complex_t retVal = { real, imag }; - return retVal; + complex_t retVal = { real, imag }; + return retVal; } - + // ------------------------- Member functions ------------------------------- - Scalar real() { + Scalar real() NBL_CONST_MEMBER_FUNC + { return m_real; } - - void real(const Scalar value) + + void real(NBL_CONST_REF_ARG(Scalar) value) { m_real = value; } - - Scalar imag() + + Scalar imag() NBL_CONST_MEMBER_FUNC { return m_imag; } - - void imag(const Scalar value) + + void imag(NBL_CONST_REF_ARG(Scalar) value) { m_imag = value; } // ------------------------- Arithmetic operators ------------------------------- - complex_t operator+(const complex_t rhs) + complex_t operator+(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC { complex_t result; result.m_real = m_real + rhs.m_real; @@ -94,8 +95,8 @@ struct complex_t return result; } - - complex_t operator-(const complex_t rhs) + + complex_t operator-(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC { complex_t result; @@ -105,7 +106,7 @@ struct complex_t return result; } - complex_t operator*(const complex_t rhs) + complex_t operator*(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC { complex_t result; @@ -114,9 +115,9 @@ struct complex_t return result; } - + // multiply by scalar - complex_t operator*(const Scalar scalar) + complex_t operator*(NBL_CONST_REF_ARG(Scalar) scalar) NBL_CONST_MEMBER_FUNC { complex_t result; result.m_real = m_real * scalar; @@ -124,9 +125,9 @@ struct complex_t return result; } - + // Divide by scalar - complex_t operator/(const Scalar scalar) + complex_t operator/(NBL_CONST_REF_ARG(Scalar) scalar) NBL_CONST_MEMBER_FUNC { complex_t result; result.m_real = m_real / scalar; @@ -134,8 +135,8 @@ struct complex_t return result; } - - complex_t operator/(const complex_t rhs) + + complex_t operator/(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC { complex_t result; @@ -145,13 +146,13 @@ struct complex_t return result; } - + // ----------------- Relational operators ----------------------------- - bool operator==(const complex_t rhs) + bool operator==(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC { return m_real == rhs.m_real && m_imag == rhs.m_imag; } - bool operator!=(const complex_t rhs) + bool operator!=(const complex_t rhs) NBL_CONST_MEMBER_FUNC { return m_real != rhs.m_real || m_imag != rhs.m_imag; } @@ -358,44 +359,44 @@ COMPLEX_COMPOUND_ASSIGN_IDENTITIES(float64_t4) // -------------------------------- Non-member functions -------------------------------------- template -Scalar real(const complex_t c) +Scalar real(NBL_CONST_REF_ARG(complex_t) c) { return c.m_real; } template -Scalar imag(const complex_t c) +Scalar imag(NBL_CONST_REF_ARG(complex_t) c) { return c.m_imag; } template -Scalar norm(const complex_t c) +Scalar norm(NBL_CONST_REF_ARG(complex_t) c) { return c.m_real * c.m_real + c.m_imag * c.m_imag; } template -Scalar abs(const complex_t c) +Scalar abs(NBL_CONST_REF_ARG(complex_t) c) { return sqrt(norm(c)); } template -Scalar arg(const complex_t c) +Scalar arg(NBL_CONST_REF_ARG(complex_t) c) { return atan2(c.m_imag, c.m_real); } template -complex_t conj(const complex_t c) +complex_t conj(NBL_CONST_REF_ARG(complex_t) c) { - complex_t retVal = {c.m_real, - c.m_imag}; + complex_t retVal = { c.m_real, -c.m_imag }; return retVal; } template -complex_t proj(const complex_t c) +complex_t proj(NBL_CONST_REF_ARG(complex_t) c) { Scalar den = norm(c) + Scalar(1.0); complex_t retVal = { (Scalar(2.0) * c.m_real) / den , (Scalar(2.0) * c.m_imag) / den}; @@ -403,7 +404,7 @@ complex_t proj(const complex_t c) } template -complex_t polar(const Scalar r, const Scalar theta) +complex_t polar(NBL_CONST_REF_ARG(Scalar) r, NBL_CONST_REF_ARG(Scalar) theta) { complex_t retVal = {r * cos(theta), r * sin(theta)}; return retVal; @@ -427,22 +428,6 @@ complex_t rotateRight(NBL_CONST_REF_ARG(complex_t) value) return retVal; } -template -struct ternary_operator< complex_t > -{ - using type_t = complex_t; - - complex_t operator()(bool condition, NBL_CONST_REF_ARG(complex_t) lhs, NBL_CONST_REF_ARG(complex_t) rhs) - { - const vector lhsVector = vector(lhs.real(), lhs.imag()); - const vector rhsVector = vector(rhs.real(), rhs.imag()); - const vector resultVector = condition ? lhsVector : rhsVector; - const complex_t result = { resultVector.x, resultVector.y }; - return result; - } -}; - - } } diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl index c1bc0277df..625414611d 100644 --- a/include/nbl/builtin/hlsl/concepts/core.hlsl +++ b/include/nbl/builtin/hlsl/concepts/core.hlsl @@ -74,12 +74,22 @@ struct is_emulating_floating_point_scalar { NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar; }; + +template +struct is_emulating_integral_scalar +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar; +}; } //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t) template NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar::value; +//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t) +template +NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar::value; + } } } diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl index 468838730a..3ea3199951 100644 --- a/include/nbl/builtin/hlsl/concepts/vector.hlsl +++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl @@ -40,6 +40,8 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial && concepts template NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial && (is_integral_v::scalar_type>); template +NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial && concepts::IntegralLikeScalar::scalar_type>; +template NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial && concepts::SignedIntegralScalar::scalar_type>; } diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl index 175a3e76c1..03d47864fb 100644 --- a/include/nbl/builtin/hlsl/cpp_compat.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl @@ -5,5 +5,9 @@ // it includes vector and matrix #include #include +#include + +// Had to push some stuff here to avoid circular dependencies +#include #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h index 3802bd69ea..0985af6eb3 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/basic.h +++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h @@ -2,35 +2,7 @@ #define _NBL_BUILTIN_HLSL_CPP_COMPAT_BASIC_INCLUDED_ #include - -namespace nbl -{ -namespace hlsl -{ -namespace impl -{ -template -struct static_cast_helper -{ - static inline To cast(From u) - { -#ifndef __HLSL_VERSION - return static_cast(u); -#else - return To(u); -#endif - } -}; -} - -template -inline To _static_cast(From v) -{ - return impl::static_cast_helper::cast(v); -} - -} -} +#include #ifndef __HLSL_VERSION #include @@ -39,10 +11,12 @@ inline To _static_cast(From v) #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR #define NBL_CONSTEXPR_FUNC constexpr #define NBL_CONSTEXPR_STATIC constexpr static +#define NBL_CONSTEXPR_INLINE constexpr inline #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline -#define NBL_CONSTEXPR_INLINE_FUNC constexpr inline +#define NBL_CONSTEXPR_STATIC_FUNC constexpr static #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr #define NBL_CONST_MEMBER_FUNC const +#define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__) namespace nbl::hlsl { @@ -65,14 +39,17 @@ namespace nbl::hlsl #else + #define ARROW .arrow(). #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR -#define NBL_CONSTEXPR_FUNC +#define NBL_CONSTEXPR_FUNC inline #define NBL_CONSTEXPR_STATIC const static +#define NBL_CONSTEXPR_INLINE const static #define NBL_CONSTEXPR_STATIC_INLINE const static -#define NBL_CONSTEXPR_INLINE_FUNC inline +#define NBL_CONSTEXPR_STATIC_FUNC static inline #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline -#define NBL_CONST_MEMBER_FUNC +#define NBL_CONST_MEMBER_FUNC +#define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__) namespace nbl { @@ -100,4 +77,34 @@ struct add_pointer #endif +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ +template +struct static_cast_helper +{ + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) u) + { +#ifndef __HLSL_VERSION + return static_cast(u); +#else + return To(u); +#endif + } +}; + +} + +template +NBL_CONSTEXPR_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v) +{ + return impl::static_cast_helper::cast(v); +} + +} +} + #endif diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index 0309b78e0d..e824c45ee1 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -75,6 +75,8 @@ template struct all_helper; template struct any_helper; +template +struct select_helper; template struct bitReverseAs_helper; template @@ -104,6 +106,12 @@ struct nMax_helper; template struct nClamp_helper; template +struct addCarry_helper; +template +struct subBorrow_helper; +template +struct undef_helper; +template struct fma_helper; #ifdef __HLSL_VERSION // HLSL only specializations @@ -118,8 +126,8 @@ struct fma_helper; // the template<> needs to be written ourselves // return type is __VA_ARGS__ to protect against `,` in templated return types #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\ -NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ -struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ +NBL_PARTIAL_REQ_TOP(is_same_v(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \ +struct HELPER_NAME(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\ {\ using return_t = __VA_ARGS__;\ static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\ @@ -141,8 +149,9 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length, template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T) -template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), T) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T) @@ -164,6 +173,10 @@ template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T) +// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput) +template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T) template AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fma_helper, fma, (T), (T)(T)(T), T) #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t, vector::Dimension>, int32_t> @@ -602,6 +615,72 @@ struct nClamp_helper } }; +// Once again no need to restrict the two below with concepts for same reason as HLSL version +template +struct addCarry_helper +{ + using return_t = spirv::AddCarryOutput; + constexpr static inline return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = operand1 + operand2; + retVal.carry = T(retVal.result < operand1); + return retVal; + } +}; + +template +struct subBorrow_helper +{ + using return_t = spirv::SubBorrowOutput; + constexpr static inline return_t __call(const T operand1, const T operand2) + { + return_t retVal; + retVal.result = static_cast(operand1 - operand2); + retVal.borrow = T(operand1 < operand2); + return retVal; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar) +struct select_helper) > +{ + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + return condition ? object1 : object2; + } +}; + +template +NBL_PARTIAL_REQ_TOP(concepts::Boolean&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) +struct select_helper&& concepts::Vector&& concepts::Vector && (extent_v == extent_v)) > +{ + NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2) + { + using traits = hlsl::vector_traits; + array_get conditionGetter; + array_get objectGetter; + array_set setter; + + T selected; + for (uint32_t i = 0; i < traits::Dimension; ++i) + setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i)); + + return selected; + } +}; + +template +struct undef_helper +{ + NBL_CONSTEXPR_STATIC_FUNC T __call() + { + T t; + return t; + } +}; + template requires concepts::FloatingPointScalar struct fma_helper diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index a5747a5fb7..3cd0cf2f8b 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -23,6 +23,12 @@ namespace nbl namespace hlsl { +template +NBL_CONSTEXPR_FUNC T undef() +{ + return cpp_compat_intrinsics_impl::undef_helper::__call(); +} + template inline typename cpp_compat_intrinsics_impl::bitCount_helper::return_t bitCount(NBL_CONST_REF_ARG(T) val) { @@ -150,6 +156,12 @@ inline bool any(Vector vec) return cpp_compat_intrinsics_impl::any_helper::__call(vec); } +template +NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2) +{ + return cpp_compat_intrinsics_impl::select_helper::__call(condition, object1, object2); +} + /** * @brief Returns x - floor(x). * @@ -217,6 +229,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A return cpp_compat_intrinsics_impl::refract_helper::__call(I, N, eta); } +template +NBL_CONSTEXPR_FUNC spirv::AddCarryOutput addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::addCarry_helper::__call(operand1, operand2); +} + +template +NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2) +{ + return cpp_compat_intrinsics_impl::subBorrow_helper::__call(operand1, operand2); +} + + #ifdef __HLSL_VERSION #define NAMESPACE spirv #else diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl index 51ca73f6d3..0afe214de7 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl @@ -15,7 +15,7 @@ namespace impl template struct Promote { - T operator()(U v) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) { return T(v); } @@ -26,7 +26,7 @@ struct Promote template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v)}; return promoted; @@ -36,7 +36,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v)}; return promoted; @@ -46,7 +46,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -56,7 +56,7 @@ struct Promote, U> template struct Promote, U> { - enable_if_t::value && is_scalar::value, vector > operator()(U v) + NBL_CONSTEXPR_FUNC enable_if_t::value && is_scalar::value, vector > operator()(NBL_CONST_REF_ARG(U) v) { vector promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)}; return promoted; @@ -68,7 +68,7 @@ struct Promote, U> } template -T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) +NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref) { impl::Promote _promote; return _promote(v); diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl new file mode 100644 index 0000000000..a95df183be --- /dev/null +++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl @@ -0,0 +1,76 @@ +#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ +#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_ + +#include "nbl/builtin/hlsl/type_traits.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" + +namespace nbl +{ +namespace hlsl +{ + +namespace impl +{ + +template +struct Truncate +{ + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v) + { + return T(v); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar && N >= 2) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar && N >= 2) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1]}; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 3) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 3) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2] }; + return truncated; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::Scalar&& N >= 4) +struct Truncate, vector NBL_PARTIAL_REQ_BOT(concepts::Scalar&& N >= 4) > +{ + NBL_CONSTEXPR_FUNC vector operator()(NBL_CONST_REF_ARG(vector) v) + { + vector truncated = { v[0], v[1], v[2], v[3] }; + return truncated; + } +}; + +} //namespace impl + +template +NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v) +{ + impl::Truncate _truncate; + return _truncate(v); +} + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl index a0cde90df9..2dfc52c957 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl @@ -412,25 +412,25 @@ inline int extractExponent(__VA_ARGS__ x)\ }\ \ template<>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size::type biasedExp)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ +NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\ {\ return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC unsigned_integer_of_size::type extractMantissa(__VA_ARGS__ x)\ {\ return extractMantissa(x.data);\ }\ \ template <>\ -NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ +NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\ {\ return extractNormalizeMantissa(x.data);\ }\ @@ -577,10 +577,10 @@ namespace ieee754 { namespace impl { -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } +template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t x) { return x.data; } } IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t); diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl index 44b881345d..df785e3e8f 100644 --- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl +++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl @@ -41,7 +41,7 @@ namespace hlsl { namespace emulated_float64_t_impl { -NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) +NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64) { uint64_t2 output; output.x = mantissa64 >> (64 - ieee754::traits::mantissaBitCnt); @@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val) } }; -NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val) +NBL_CONSTEXPR_FUNC bool isZero(uint64_t val) { return (val << 1) == 0ull; } @@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern(int64_t val) return sign | reinterpretAsFloat64BitPattern(absVal); }; -NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value) +NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value) { const uint64_t biasBits = value & ieee754::traits::exponentMask; return biasBits ? value : (value & ieee754::traits::signMask); } -NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) +NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa) { return signShifted | expShifted | mantissa; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) { lhs &= ~ieee754::traits::signMask; rhs &= ~ieee754::traits::signMask; @@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs) return lhs == rhs && lhs == ieee754::traits::inf; } -NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs | rhs) << 1); } -NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs) { return !bool((lhs) << 1) && (lhs == rhs); } template -NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) +NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs) { if (!FastMath) { diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl new file mode 100644 index 0000000000..8a3fd42faf --- /dev/null +++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl @@ -0,0 +1,514 @@ +#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ +#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" + +// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs +// and whether the topmost bits of the divisor are equal to 0 +// - Francisco + +namespace nbl +{ +namespace hlsl +{ + +template +struct emulated_int64_base +{ + using storage_t = vector; + using this_t = emulated_int64_base; + using this_signed_t = emulated_int64_base; + + storage_t data; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION + + emulated_int64_base() = default; + + // GLM requires these to cast vectors because it uses a native `static_cast` + template + constexpr explicit emulated_int64_base(const I& toEmulate); + + constexpr explicit emulated_int64_base(const emulated_int64_base& other) : data(other.data) {} + + #endif + + /** + * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern + * + * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits) + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data) + { + this_t retVal; + retVal.data = _data; + return retVal; + } + + /** + * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern + * + * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated + * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated + */ + NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi) + { + return create(storage_t(lo, hi)); + } + + // ------------------------------------------------------- CONVERSION OPERATORS--------------------------------------------------------------- + // GLM requires these for vector casts + + #ifndef __HLSL_VERSION + + template + constexpr explicit operator I() const noexcept; + + #endif + + // ------------------------------------------------------- INTERNAL GETTERS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC + { + return data.x; + } + + NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC + { + return data.y; + } + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data & rhs.data); + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data | rhs.data); + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(data ^ rhs.data); + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal = create(~data); + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + constexpr inline this_t operator<<(uint32_t bits) const; + constexpr inline this_t operator>>(uint32_t bits) const; + + #endif + + // ------------------------------------------------------- ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + vector negated = -data; + return this_signed_t::create(_static_cast(negated)); + } + + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::AddCarryOutput lowerAddResult = addCarry(__getLSB(), rhs.__getLSB()); + return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry); + } + + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const spirv::SubBorrowOutput lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB()); + return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow); + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + equal_to equals; + return all(equals(data, rhs.data)); + } + + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + not_equal_to notEquals; + return any(notEquals(data, rhs.data)); + } + + NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less + // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is) + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) < bit_cast(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB()); + const bool LSB = __getLSB() < rhs.__getLSB(); + return MSBEqual ? LSB : MSB; + } + + NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + // Same reasoning as above + const bool MSBEqual = __getMSB() == rhs.__getMSB(); + const bool MSB = Signed ? (bit_cast(__getMSB()) > bit_cast(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB()); + const bool LSB = __getLSB() > rhs.__getLSB(); + return MSBEqual ? LSB : MSB; + } + + NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator>(rhs); + } + + NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return !operator<(rhs); + } +}; + +using emulated_uint64_t = emulated_int64_base; +using emulated_int64_t = emulated_int64_base; + +namespace impl +{ + +template +struct static_cast_helper, emulated_int64_base > +{ + using To = emulated_int64_base; + using From = emulated_int64_base; + + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other) + { + To retVal; + retVal.data = other.data; + return retVal; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > +{ + using To = I; + using From = emulated_int64_base; + + // Return only the lowest bits + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) + { + return _static_cast(val.data.x); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > +{ + using To = I; + using From = emulated_int64_base; + + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val) + { + return bit_cast(val.data); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) <= sizeof(uint32_t))) > +{ + using To = emulated_int64_base; + using From = I; + + // Set only lower bits + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) + { + return To::create(_static_cast(i), uint32_t(0)); + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) +struct static_cast_helper, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && (sizeof(I) > sizeof(uint32_t))) > +{ + using To = emulated_int64_base; + using From = I; + + NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i) + { + // `bit_cast` blocked by GLM vectors using a union + #ifndef __HLSL_VERSION + return To::create(_static_cast(i), _static_cast(i >> 32)); + #else + To retVal; + retVal.data = bit_cast >(i); + return retVal; + #endif + } +}; + +} //namespace impl + +// Define constructor and conversion operators + +#ifndef __HLSL_VERSION + +template +template +constexpr emulated_int64_base::emulated_int64_base(const I& toEmulate) +{ + *this = _static_cast>(toEmulate); +} + +template +template +constexpr emulated_int64_base::operator I() const noexcept +{ + return _static_cast(*this); +} + +#endif + +// ---------------------- Functional operators ------------------------ + +template +struct left_shift_operator > +{ + using type_t = emulated_int64_base; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(0, operand.__getLSB() << shift) + : vector(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift))); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); + } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_uint64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(operand.__getMSB() >> shift, 0) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits)); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); + } + + // If `_bits > 63` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +template<> +struct arithmetic_right_shift_operator +{ + using type_t = emulated_int64_t; + NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t)); + + // Can't do generic templated definition, see: + //https://github.com/microsoft/DirectXShaderCompiler/issues/7325 + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits) + { + const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB + const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits; + const type_t shifted = type_t::create(bigShift ? vector(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0)) + : vector((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits))); + ternary_operator ternary; + return ternary(bool(bits), shifted, operand); + } + + // If `_bits > 63` or `_bits < 0` the result is undefined + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits) + { + return operator()(operand, _static_cast(bits)); + } +}; + +#ifndef __HLSL_VERSION + +template +constexpr inline emulated_int64_base emulated_int64_base::operator<<(uint32_t bits) const +{ + left_shift_operator leftShift; + return leftShift(*this, bits); +} + +constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const +{ + arithmetic_right_shift_operator rightShift; + return rightShift(*this, bits); +} + +#endif + +// ---------------------- STD arithmetic operators ------------------------ +// Specializations of the structs found in functional.hlsl +// These all have to be specialized because of the identity that can't be initialized inside the struct definition + +template +struct plus > +{ + using type_t = emulated_int64_base; + + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + return lhs + rhs; + } + + const static type_t identity; +}; + +template +struct minus > +{ + using type_t = emulated_int64_base; + + type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + return lhs - rhs; + } + + const static type_t identity; +}; + +template<> +NBL_CONSTEXPR_INLINE emulated_uint64_t plus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR_INLINE emulated_int64_t plus::identity = _static_cast(int64_t(0)); +template<> +NBL_CONSTEXPR_INLINE emulated_uint64_t minus::identity = _static_cast(uint64_t(0)); +template<> +NBL_CONSTEXPR_INLINE emulated_int64_t minus::identity = _static_cast(int64_t(0)); + +// --------------------------------- Compound assignment operators ------------------------------------------ +// Specializations of the structs found in functional.hlsl + +template +struct plus_assign > +{ + using type_t = emulated_int64_base; + using base_t = plus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + lhs = baseOp(lhs, rhs); + } + + const static type_t identity; +}; + +template +struct minus_assign > +{ + using type_t = emulated_int64_base; + using base_t = minus; + base_t baseOp; + void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs) + { + lhs = baseOp(lhs, rhs); + } + + const static type_t identity; +}; + +template<> +NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign::identity = plus::identity; +template<> +NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign::identity = minus::identity; +template<> +NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign::identity = minus::identity; + +// ------------------------------------------------ TYPE TRAITS SATISFIED ----------------------------------------------------- + +template<> +struct is_signed : bool_constant {}; + +template<> +struct is_unsigned : bool_constant {}; + +// --------------------------------------------------- CONCEPTS SATISFIED ----------------------------------------------------- +namespace concepts +{ +namespace impl +{ +template +struct is_emulating_integral_scalar > +{ + NBL_CONSTEXPR_STATIC_INLINE bool value = true; +}; +} +} + +} //namespace nbl +} //namespace hlsl + +// Declare them as signed/unsigned versions of each other + +#ifndef __HLSL_VERSION +#define NBL_ADD_STD std:: +#else +#define NBL_ADD_STD nbl::hlsl:: +#endif + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_unsigned : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +template<> +struct NBL_ADD_STD make_signed : type_identity {}; + +#undef NBL_ADD_STD + + + +#endif diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl index 0053008aa4..3780ce001b 100644 --- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl @@ -2,6 +2,7 @@ #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_ #include +#include #include #include #include @@ -23,7 +24,7 @@ struct _2_component_vec static_assert(sizeof(T) <= 8); - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -31,7 +32,7 @@ struct _2_component_vec y = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -39,9 +40,10 @@ struct _2_component_vec return y; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 2; @@ -55,7 +57,7 @@ struct _3_component_vec T z; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -65,7 +67,7 @@ struct _3_component_vec z = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -75,9 +77,10 @@ struct _3_component_vec return z; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //TAsUint invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 3; @@ -91,7 +94,7 @@ struct _4_component_vec T z; T w; - NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val) + NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val) { if (componentIdx == 0) x = val; @@ -103,7 +106,7 @@ struct _4_component_vec w = val; } - NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC + NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC { if (componentIdx == 0) return x; @@ -115,133 +118,210 @@ struct _4_component_vec return w; // TODO: avoid code duplication, make it constexpr - using TAsUint = typename unsigned_integer_of_size::type; - uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); - return nbl::hlsl::bit_cast(invalidComponentValue); + //using TAsUint = typename unsigned_integer_of_size::type; + //uint64_t invalidComponentValue = nbl::hlsl::_static_cast(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8)); + //return nbl::hlsl::bit_cast(invalidComponentValue); + return nbl::hlsl::undef(); } NBL_CONSTEXPR_STATIC uint32_t Dimension = 4; }; -template ::value> -struct emulated_vector : CRTP -{ - using this_t = emulated_vector; - using component_t = ComponentType; - - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) - { - CRTP output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other.getComponent(i)); - } - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, other[i]); - - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + val); - - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other.getComponent(i)); - - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, this_t::getComponent(i) + other[i]); - - return output; - } - - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val) - { - this_t output; +template +struct emulated_vector; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - val); +// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) - { - this_t output; - - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i)); +#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i).operator##OP());\ + return output;\ +} - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector other) - { - this_t output; +#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP val);\ + return output;\ +}\ +NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) - other[i]); +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC this_t operator##OP(vector other) NBL_CONST_MEMBER_FUNC \ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, this_t::getComponent(i) OP other[i]);\ + return output;\ +} - return output; - } +#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other.getComponent(i);\ + return output;\ +} - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val) - { - this_t output; +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\ +NBL_CONSTEXPR_FUNC vector operator##OP (vector other) NBL_CONST_MEMBER_FUNC \ +{\ + vector output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output[i] = CRTP::getComponent(i) OP other[i];\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * val); +#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +using this_t = emulated_vector;\ +using component_t = ComponentType;\ +NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\ +{\ + CRTP output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other.getComponent(i));\ +}\ +NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \ +{\ + component_t sum = CRTP::getComponent(0);\ + [[unroll]]\ + for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\ + sum = sum + CRTP::getComponent(i);\ + return sum;\ +} - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) - { - this_t output; +#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \ +NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other)\ +{\ + this_t output;\ + [[unroll]]\ + for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ + output.setComponent(i, other[i]);\ + return output;\ +} - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i)); +// Fundamental, integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector&& concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - return output; - } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector other) - { - this_t output; +// Fundamental, not integral +template NBL_PARTIAL_REQ_TOP(is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation for fundamental type + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=) +}; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, CRTP::getComponent(i) * other[i]); +// Not fundamental, integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && concepts::IntegralLikeScalar) +struct emulated_vector && concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators, including integral + NBL_EMULATED_VECTOR_UNARY_OPERATOR(~) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^) + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - return output; - } +// Not fundamental, not integral +template NBL_PARTIAL_REQ_TOP(!is_fundamental_v && !concepts::IntegralLikeScalar) +struct emulated_vector && !concepts::IntegralLikeScalar) > : CRTP +{ + // Creation + NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM + // Operators + NBL_EMULATED_VECTOR_UNARY_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*) + NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/) + // Comparison operators + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>) + NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=) +}; - NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum() - { - component_t sum = 0; - for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - sum = sum + CRTP::getComponent(i); +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR +#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR +#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR - return sum; - } -}; +// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION -------------------------------------------------------------------- #define DEFINE_OPERATORS_FOR_TYPE(...)\ -NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -250,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -259,7 +339,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\ return output;\ }\ \ -NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ +NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \ {\ this_t output;\ for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\ @@ -269,14 +349,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\ }\ \ -// TODO: some of code duplication could be avoided -template -struct emulated_vector : CRTP + +template +struct emulated_vector, CRTP> : CRTP { - using component_t = ComponentType; - using this_t = emulated_vector; + using component_t = emulated_float64_t; + using this_t = emulated_vector; - NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other) { this_t output; @@ -287,17 +367,17 @@ struct emulated_vector : CRTP } template - NBL_CONSTEXPR_STATIC_INLINE this_t create(vector other) + NBL_CONSTEXPR_STATIC_FUNC this_t create(vector other) { this_t output; for (uint32_t i = 0u; i < CRTP::Dimension; ++i) - output.setComponent(i, ComponentType::create(other[i])); + output.setComponent(i, component_t::create(other[i])); return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other) + NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -306,7 +386,7 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other) + NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -315,7 +395,7 @@ struct emulated_vector : CRTP return output; } - NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other) + NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC { this_t output; @@ -338,9 +418,9 @@ struct emulated_vector : CRTP DEFINE_OPERATORS_FOR_TYPE(int32_t) DEFINE_OPERATORS_FOR_TYPE(int64_t) - NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum() + NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC { - ComponentType sum = ComponentType::create(0); + component_t sum = component_t::create(0); for (uint32_t i = 0u; i < CRTP::Dimension; ++i) sum = sum + CRTP::getComponent(i); @@ -425,7 +505,7 @@ namespace impl template struct static_cast_helper, vector, void> { - static inline emulated_vector_t2 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t2 output; output.x = _static_cast(vec.x); @@ -438,7 +518,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t3 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t3 output; output.x = _static_cast(vec.x); @@ -452,7 +532,7 @@ struct static_cast_helper, vector, void> template struct static_cast_helper, vector, void> { - static inline emulated_vector_t4 cast(vector vec) + NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4 cast(NBL_CONST_REF_ARG(vector) vec) { emulated_vector_t4 output; output.x = _static_cast(vec.x); @@ -470,12 +550,13 @@ struct static_cast_helper, emulated_vector_t; using InputVecType = emulated_vector_t; - static inline OutputVecType cast(InputVecType vec) + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec) { array_get getter; array_set setter; OutputVecType output; + [[unroll]] for (int i = 0; i < N; ++i) setter(output, i, _static_cast(getter(vec, i))); @@ -483,7 +564,77 @@ struct static_cast_helper, emulated_vector_t\ +struct static_cast_helper, emulated_vector_t##N , void>\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(output, i, _static_cast(getter(vec, i)));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3) +NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4) + +#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST + +#define NBL_EMULATED_VEC_PROMOTION(N) template\ +struct Promote, ComponentType>\ +{\ + using VecType = emulated_vector_t##N ;\ + NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\ + {\ + array_set setter;\ + VecType promoted;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(promoted, i, v);\ + return promoted;\ + }\ +}; + +NBL_EMULATED_VEC_PROMOTION(2) +NBL_EMULATED_VEC_PROMOTION(3) +NBL_EMULATED_VEC_PROMOTION(4) + +#undef NBL_EMULATED_VEC_PROMOTION + +#define NBL_EMULATED_VEC_TRUNCATION(N, M) template\ +struct Truncate, emulated_vector_t##M >\ +{\ + using OutputVecType = emulated_vector_t##N ;\ + using InputVecType = emulated_vector_t##M ;\ + NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\ + {\ + array_get getter;\ + array_set setter;\ + OutputVecType output;\ + [[unroll]]\ + for (int i = 0; i < N; ++i)\ + setter(output, i, getter(vec, i));\ + return output;\ + }\ +}; + +NBL_EMULATED_VEC_TRUNCATION(2, 2) +NBL_EMULATED_VEC_TRUNCATION(2, 3) +NBL_EMULATED_VEC_TRUNCATION(2, 4) +NBL_EMULATED_VEC_TRUNCATION(3, 3) +NBL_EMULATED_VEC_TRUNCATION(3, 4) +NBL_EMULATED_VEC_TRUNCATION(4, 4) + +#undef NBL_EMULATED_VEC_TRUNCATION + +} //namespace impl } } diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl index 25d822a940..76b527f6bd 100644 --- a/include/nbl/builtin/hlsl/functional.hlsl +++ b/include/nbl/builtin/hlsl/functional.hlsl @@ -7,6 +7,7 @@ #include "nbl/builtin/hlsl/glsl_compat/core.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" +#include "nbl/builtin/hlsl/concepts/vector.hlsl" namespace nbl @@ -79,7 +80,7 @@ struct reference_wrapper : enable_if_t< // TODO: partial specializations for T being a special SPIR-V type for image ops, etc. -#define ALIAS_STD(NAME,OP) template struct NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME { \ using type_t = T; \ \ T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \ @@ -90,8 +91,7 @@ struct reference_wrapper : enable_if_t< #else // CPP - -#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ +#define ALIAS_STD(NAME,OP) template struct NAME : std::NAME { \ using type_t = T; #endif @@ -134,15 +134,90 @@ ALIAS_STD(divides,/) NBL_CONSTEXPR_STATIC_INLINE T identity = T(1); }; +#ifndef __HLSL_VERSION + +template +struct bit_not : std::bit_not +{ + using type_t = T; +}; + +#else + +template +struct bit_not +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return ~operand; + } +}; + +// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former. +// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization. +template NBL_PARTIAL_REQ_TOP(!(concepts::Scalar || concepts::Vector || concepts::Matrix)) +struct bit_not || concepts::Vector || concepts::Matrix)) > +{ + using type_t = T; + + T operator()(NBL_CONST_REF_ARG(T) operand) + { + return operand.operator~(); + } +}; + +#endif -ALIAS_STD(greater,>) }; -ALIAS_STD(less,<) }; -ALIAS_STD(greater_equal,>=) }; -ALIAS_STD(less_equal,<=) }; +ALIAS_STD(equal_to, ==) }; +ALIAS_STD(not_equal_to, !=) }; +ALIAS_STD(greater, >) }; +ALIAS_STD(less, <) }; +ALIAS_STD(greater_equal, >=) }; +ALIAS_STD(less_equal, <=) }; #undef ALIAS_STD -// ------------------------ Compound assignment operators ---------------------- +// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector` for vectorial types + +// GLM doesn't have operators on vectors +#ifndef __HLSL_VERSION + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return glm::GLM_OP (lhs, rhs);\ + }\ +}; + +#else + +#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template NBL_PARTIAL_REQ_TOP(concepts::Vectorial)\ +struct NAME ) >\ +{\ + using type_t = T;\ + vector::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\ + {\ + return lhs OP rhs;\ + }\ +}; + +#endif + +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual) +NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual) + +#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION + +// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS -------------------------------------------------------------------- #define COMPOUND_ASSIGN(NAME) template struct NAME##_assign { \ using type_t = T; \ @@ -163,9 +238,9 @@ COMPOUND_ASSIGN(divides) #undef COMPOUND_ASSIGN -// ----------------- End of compound assignment ops ---------------- +// ---------------------------------------------------------------- MIN, MAX, TERNARY ------------------------------------------------------------------------- -// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD +// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD // TODO: implement as mix(rhs struct minimum @@ -195,18 +270,226 @@ struct maximum NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits::lowest; // TODO: `all_components` }; -template +template struct ternary_operator { using type_t = T; - T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) + { + return select(condition, lhs, rhs); + } +}; + +// ----------------------------------------------------------------- SHIFT OPERATORS -------------------------------------------------------------------- + +template +struct left_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct left_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand << bits; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) { - return condition ? lhs : rhs; + return operand << bits; } }; -} -} +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector && concepts::IntegralLikeVectorial) +struct left_shift_operator && concepts::IntegralLikeVectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), getter(bits, i))); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + left_shift_operator leftShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, leftShift(getter(operand, i), bits)); + } + return shifted; + } +}; + +template +struct arithmetic_right_shift_operator +{ + using type_t = T; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(concepts::IntVector) +struct arithmetic_right_shift_operator) > +{ + using type_t = T; + using scalar_t = scalar_type_t; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + return operand >> bits; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + return operand >> bits; + } +}; + +template NBL_PARTIAL_REQ_TOP(!concepts::IntVector&& concepts::IntegralLikeVectorial) +struct arithmetic_right_shift_operator&& concepts::IntegralLikeVectorial) > +{ + using type_t = T; + using scalar_t = typename vector_traits::scalar_type; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), getter(bits, i))); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector::Dimension>) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits[i])); + } + return shifted; + } + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits) + { + array_get getter; + array_set setter; + NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v); + arithmetic_right_shift_operator rightShift; + T shifted; + [[unroll]] + for (uint16_t i = 0; i < extent; i++) + { + setter(shifted, i, rightShift(getter(operand, i), bits)); + } + return shifted; + } +}; + +// Left unimplemented for vectorial types by default +template +struct logical_right_shift_operator +{ + using type_t = T; + using unsigned_type_t = make_unsigned_t; + + NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits) + { + arithmetic_right_shift_operator arithmeticRightShift; + return _static_cast(arithmeticRightShift(_static_cast(operand), _static_cast(bits))); + } +}; + + + +} //namespace nbl +} //namespace hlsl #endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl index 4b281c2111..72a9d2fe59 100644 --- a/include/nbl/builtin/hlsl/ieee754.hlsl +++ b/include/nbl/builtin/hlsl/ieee754.hlsl @@ -89,7 +89,7 @@ inline int extractExponent(T x) } template -NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) +NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size::type biasedExp) { using AsFloat = typename float_of_size::type; return impl::castBackToFloatType(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits::mantissaBitCnt, traits::exponentBitCnt)); @@ -97,20 +97,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer // performs no overflow tests, returns x*exp2(n) template -NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n) +NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n) { return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n)); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::type>::mantissaMask; } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractNormalizeMantissa(T x) { using AsUint = typename unsigned_integer_of_size::type; using AsFloat = typename float_of_size::type; @@ -118,21 +118,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type ext } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSign(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSign(T x) { using AsFloat = typename float_of_size::type; return (ieee754::impl::bitCastToUintType(x) & traits::signMask) >> ((sizeof(T) * 8) - 1); } template -NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) +NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size::type extractSignPreserveBitPattern(T x) { using AsFloat = typename float_of_size::type; return ieee754::impl::bitCastToUintType(x) & traits::signMask; } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) +NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from) { using AsUint = typename unsigned_integer_of_size::type; @@ -143,7 +143,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint } template ) -NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) +NBL_CONSTEXPR_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true) { using AsFloat = typename float_of_size::type; using AsUint = typename unsigned_integer_of_size::type; diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl index ad8a3f9228..69fba9795f 100644 --- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl +++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl @@ -15,25 +15,25 @@ namespace ieee754 namespace impl { template -NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) +NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t bitCastToUintType(T x) { using AsUint = unsigned_integer_of_size_t; return bit_cast(x); } // to avoid bit cast from uintN_t to uintN_t -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } -template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; } +template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; } template -NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x) +NBL_CONSTEXPR_FUNC T castBackToFloatType(T x) { using AsFloat = typename float_of_size::type; return bit_cast(x); } -template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } -template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; } +template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; } } } diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl new file mode 100644 index 0000000000..d570e249c8 --- /dev/null +++ b/include/nbl/builtin/hlsl/morton.hlsl @@ -0,0 +1,624 @@ +#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_ + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/concepts/core.hlsl" +#include "nbl/builtin/hlsl/bit.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" +#include "nbl/builtin/hlsl/emulated/int64_t.hlsl" +#include "nbl/builtin/hlsl/mpl.hlsl" +#include "nbl/builtin/hlsl/portable/vector_t.hlsl" + +// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign + +namespace nbl +{ +namespace hlsl +{ +namespace morton +{ + +namespace impl +{ + +// Valid dimension for a morton code +template +NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5; + +// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS --------------------------------------------------- + +NBL_CONSTEXPR uint16_t CodingStages = 5; + +template +struct coding_mask; + +template +NBL_CONSTEXPR T coding_mask_v = _static_cast(coding_mask::value); + +// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work, +// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used + +#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast >(\ + truncate >(\ + vector(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\ + NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3))) + + +template +struct sign_mask : integral_constant {}; + +template +NBL_CONSTEXPR T sign_mask_v = _static_cast(sign_mask::value); + +#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast >(\ + truncate >(\ + vector(sign_mask_v< DIM, BITS >,\ + sign_mask_v< DIM, BITS > << 1,\ + sign_mask_v< DIM, BITS > << 2,\ + sign_mask_v< DIM, BITS > << 3))) + +// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage +// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis). +#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\ +}; + +#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template struct coding_mask\ +{\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\ +}; + +// Final stage mask also counts exact number of bits, although maybe it's not necessary +#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template struct coding_mask\ +{\ + enum : uint64_t { _Bits = Bits };\ + NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\ +}; + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull) // Groups bits by 1 on, 1 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull) // Groups bits by 1 on, 2 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2 on, 4 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4 on, 8 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8 on, 16 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off + +NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull) // Groups bits by 1 on, 3 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2 on, 6 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4 on, 12 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8 on, 24 off +NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining) + +NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS + +#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK +#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK + +// ----------------------------------------------------------------- MORTON TRANSCODER --------------------------------------------------- +template && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v, uint64_t(16)>) +struct Transcoder +{ + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index + * + * @param [in] decodedValue Cartesian coordinates to interleave and shift + */ + NBL_CONSTEXPR_STATIC_FUNC portable_vector_t interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + left_shift_operator > leftShift; + portable_vector_t interleaved = _static_cast >(decodedValue) & coding_mask_v; + + #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\ + interleaved = interleaved & coding_mask_v;\ + } + ENCODE_LOOP_ITERATION(4) + ENCODE_LOOP_ITERATION(3) + ENCODE_LOOP_ITERATION(2) + ENCODE_LOOP_ITERATION(1) + ENCODE_LOOP_ITERATION(0) + + #undef ENCODE_LOOP_ITERATION + + // After interleaving, shift each coordinate left by their index + return leftShift(interleaved, truncate >(vector(0, 1, 2, 3))); + } + + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Encodes a vector of cartesian coordinates as a Morton code + * + * @param [in] decodedValue Cartesian coordinates to encode + */ + NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue) + { + const portable_vector_t interleaveShifted = interleaveShift(decodedValue); + + array_get, encode_t> getter; + encode_t encoded = getter(interleaveShifted, 0); + + [[unroll]] + for (uint16_t i = 1; i < Dim; i++) + encoded = encoded | getter(interleaveShifted, i); + + return encoded; + } + + template 16), vector, vector > + NBL_FUNC_REQUIRES(concepts::IntVector && 8 * sizeof(typename vector_traits::scalar_type) >= Bits) + /** + * @brief Decodes a Morton code back to a vector of cartesian coordinates + * + * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below) + */ + NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue) + { + arithmetic_right_shift_operator encodedRightShift; + portable_vector_t decoded; + array_set, encode_t> setter; + // Write initial values into decoded + [[unroll]] + for (uint16_t i = 0; i < Dim; i++) + setter(decoded, i, encodedRightShift(encodedValue, i)); + + arithmetic_right_shift_operator > rightShift; + + #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\ + {\ + decoded = decoded & coding_mask_v;\ + decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\ + } + + DECODE_LOOP_ITERATION(0) + DECODE_LOOP_ITERATION(1) + DECODE_LOOP_ITERATION(2) + DECODE_LOOP_ITERATION(3) + DECODE_LOOP_ITERATION(4) + + #undef DECODE_LOOP_ITERATION + + // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated + NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits::scalar_type)) + return _static_cast(decoded); + else + return _static_cast(decoded & coding_mask_v); + } +}; + +// ---------------------------------------------------- COMPARISON OPERATORS --------------------------------------------------------------- +// Here because no partial specialization of methods +// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted + +template +NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar && is_signed_v == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v, uint64_t(16)>)); + +template +struct Equals; + +template +struct Equals +{ + template) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t zeros = _static_cast >(truncate >(vector(0,0,0,0))); + + const portable_vector_t rhsCasted = _static_cast >(rhs); + const portable_vector_t xored = rhsCasted ^ (InterleaveMasks & value); + equal_to > equal; + return equal(xored, zeros); + } +}; + +template +struct Equals +{ + template) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return Equals::template __call(value, interleaved); + } +}; + +template +struct BaseComparison; + +// Aux variable that has only the sign bit for the first of D dimensions +template +NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1)); + +template +struct BaseComparison +{ + template) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t) rhs) + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, ); + const portable_vector_t SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits); + ComparisonOp comparison; + NBL_IF_CONSTEXPR(Signed) + { + // Obtain a vector of deinterleaved coordinates and flip their sign bits + portable_vector_t thisCoord = (InterleaveMasks & value) ^ SignMasks; + // rhs already deinterleaved, just have to cast type and flip sign + const portable_vector_t rhsCoord = _static_cast >(rhs) ^ SignMasks; + + return comparison(thisCoord, rhsCoord); + } + else + { + // Obtain a vector of deinterleaved coordinates + portable_vector_t thisCoord = InterleaveMasks & value; + // rhs already deinterleaved, just have to cast type + const portable_vector_t rhsCoord = _static_cast >(rhs); + + return comparison(thisCoord, rhsCoord); + } + + } +}; + +template +struct BaseComparison +{ + template) + NBL_CONSTEXPR_STATIC_FUNC vector __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector) rhs) + { + using right_sign_t = conditional_t, make_unsigned_t >; + const portable_vector_t interleaved = _static_cast >(Transcoder::interleaveShift(rhs)); + return BaseComparison::template __call(value, interleaved); + } +}; + +template +struct LessThan : BaseComparison > > {}; + +template +struct LessEquals : BaseComparison > > {}; + +template +struct GreaterThan : BaseComparison > > {}; + +template +struct GreaterEquals : BaseComparison > > {}; + +} //namespace impl + +// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006 +// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it +template && D * Bits <= 64) +struct code +{ + using this_t = code; + using this_signed_t = code; + NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits; + using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>; + + storage_t value; + + // ---------------------------------------------------- CONSTRUCTORS --------------------------------------------------------------- + + #ifndef __HLSL_VERSION + + code() = default; + + #endif + + /** + * @brief Creates a Morton code from a set of integral cartesian coordinates + * + * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class + */ + template + NBL_CONSTEXPR_STATIC_FUNC enable_if_t && is_scalar_v && (is_signed_v == Signed) && (8 * sizeof(I) >= Bits), this_t> + create(NBL_CONST_REF_ARG(vector) cartesian) + { + this_t retVal; + retVal.value = impl::Transcoder::encode(cartesian); + return retVal; + } + + // CPP can also have an actual constructor + #ifndef __HLSL_VERSION + + /** + * @brief Creates a Morton code from a set of cartesian coordinates + * + * @param [in] cartesian Coordinates to encode + */ + template= Bits) + inline explicit code(NBL_CONST_REF_ARG(vector) cartesian) + { + *this = create(cartesian); + } + + /** + * @brief Decodes this Morton code back to a set of cartesian coordinates + */ + template= Bits && is_signed_v == Signed) + constexpr explicit operator vector() const noexcept; + + #endif + + // ------------------------------------------------------- BITWISE OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value & rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value | rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = value ^ rhs.value; + return retVal; + } + + NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC + { + this_t retVal; + retVal.value = ~value; + return retVal; + } + + // Only valid in CPP + #ifndef __HLSL_VERSION + + constexpr this_t operator<<(uint16_t bits) const; + + constexpr this_t operator>>(uint16_t bits) const; + + #endif + + // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC + { + this_t zero; + zero.value = _static_cast(0); + #ifndef __HLSL_VERSION + return zero - *this; + #else + return zero - this; + #endif + } + + // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS ------------------------------------------------- + + // put 1 bits everywhere in the bits the current axis is not using + // then extract just the axis bits for the right hand coordinate + // carry-1 will propagate the bits across the already set bits + // then clear out the bits not belonging to current axis + // Note: Its possible to clear on `this` and fill on `rhs` but that will + // disable optimizations, we expect the compiler to optimize a lot if the + // value of `rhs` is known at compile time, e.g. `static_cast>(glm::ivec3(1,0,0))` + NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); + bit_not > bitnot; + // For each coordinate, leave its bits intact and turn every other bit ON + const portable_vector_t counterMaskedValue = bitnot(InterleaveMasks) | value; + // For each coordinate in rhs, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; + // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; + this_t retVal; + retVal.value = getter(interleaveShiftedResult, 0); + [[unroll]] + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + return retVal; + } + + // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate + NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + const portable_vector_t InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::); + // For each coordinate, leave its bits intact and turn every other bit OFF + const portable_vector_t maskedValue = InterleaveMasks & value; + // Do the same for each coordinate in rhs + const portable_vector_t maskedRhsValue = InterleaveMasks & rhs.value; + // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF + const portable_vector_t interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks; + // Re-encode the result + array_get, storage_t> getter; + this_t retVal; + retVal.value = getter(interleaveShiftedResult, 0); + [[unroll]] + for (uint16_t i = 1; i < D; i++) + retVal.value = retVal.value | getter(interleaveShiftedResult, i); + + return retVal; + } + + // ------------------------------------------------------- COMPARISON OPERATORS ------------------------------------------------- + + NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value == rhs.value; + } + + template) + NBL_CONSTEXPR_FUNC vector equal(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::Equals::template __call(value, rhs); + } + + NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC + { + return value != rhs.value; + } + + template) + NBL_CONSTEXPR_FUNC vector notEqual(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return !equal(rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector lessThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::LessThan::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector lessThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::LessEquals::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector greaterThan(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::GreaterThan::template __call(value, rhs); + } + + template) + NBL_CONSTEXPR_FUNC vector greaterThanEquals(NBL_CONST_REF_ARG(vector) rhs) NBL_CONST_MEMBER_FUNC + { + return impl::GreaterEquals::template __call(value, rhs); + } + +}; + +} //namespace morton + +// Specialize the `static_cast_helper` +namespace impl +{ + +// I must be of same signedness as the morton code, and be wide enough to hold each component +template NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) +struct static_cast_helper, morton::code, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar && 8 * sizeof(I) >= Bits) > +{ + NBL_CONSTEXPR_STATIC_FUNC vector cast(NBL_CONST_REF_ARG(morton::code, Bits, D, _uint64_t>) val) + { + using storage_t = typename morton::code, Bits, D, _uint64_t>::storage_t; + return morton::impl::Transcoder::decode(val.value); + } +}; + +} // namespace impl + +template +struct left_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + left_shift_operator valueLeftShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueLeftShift(operand.value, bits * D); + // Previous shift might move bits to positions that storage has available but the morton code does not use + // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off + const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0); + retVal.value = retVal.value & _static_cast(UsedBitsMask); + return retVal; + } +}; + +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using storage_t = typename type_t::storage_t; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + arithmetic_right_shift_operator valueArithmeticRightShift; + type_t retVal; + // Shift every coordinate by `bits` + retVal.value = valueArithmeticRightShift(operand.value, bits * D); + return retVal; + } +}; + +// This one's uglier - have to unpack to get the expected behaviour +template +struct arithmetic_right_shift_operator > +{ + using type_t = morton::code; + using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>; + + NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits) + { + vector cartesian = _static_cast >(operand); + // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate), + // then right-shift again to get correct sign on each coordinate + // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits + const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits); + cartesian <<= ShiftFactor; + cartesian >>= ShiftFactor + scalar_t(bits); + return type_t::create(cartesian); + } +}; + +#ifndef __HLSL_VERSION + +template&& D* Bits <= 64) +constexpr morton::code morton::code::operator<<(uint16_t bits) const +{ + left_shift_operator> leftShift; + return leftShift(*this, bits); +} + +template&& D* Bits <= 64) +constexpr morton::code morton::code::operator>>(uint16_t bits) const +{ + arithmetic_right_shift_operator> rightShift; + return rightShift(*this, bits); +} + +template && D* Bits <= 64) +template = Bits && is_signed_v == Signed) +constexpr morton::code::operator vector() const noexcept +{ + return _static_cast, morton::code>(*this); +} + +#endif + +#undef NBL_MORTON_INTERLEAVE_MASKS +#undef NBL_MORTON_SIGN_MASKS + +} //namespace hlsl +} //namespace nbl + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl index 2015b05b3d..67f6445324 100644 --- a/include/nbl/builtin/hlsl/mpl.hlsl +++ b/include/nbl/builtin/hlsl/mpl.hlsl @@ -43,13 +43,23 @@ struct countl_zero : impl::countl_zero template NBL_CONSTEXPR T countl_zero_v = countl_zero::value; +template +struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; +template +NBL_CONSTEXPR bool is_pot_v = is_pot::value; + template struct log2 { NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero::value-1 : -1ull; }; template -NBL_CONSTEXPR uint64_t log2_v = log2::value; +NBL_CONSTEXPR uint16_t log2_v = log2::value; + +template +struct log2_ceil : integral_constant + uint16_t(!is_pot_v)> {}; +template +NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil::value; template struct rotl @@ -79,11 +89,6 @@ struct align_up template NBL_CONSTEXPR uint64_t align_up_v = align_up::value; -template -struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {}; -template -NBL_CONSTEXPR bool is_pot_v = is_pot::value; - template struct max { @@ -99,6 +104,17 @@ struct min }; template NBL_CONSTEXPR T min_v = min::value; + +template +struct round_up_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot::value; + +template +struct round_down_to_pot : integral_constant > {}; +template +NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot::value; + } } } diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl new file mode 100644 index 0000000000..2dffa40a2d --- /dev/null +++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl @@ -0,0 +1,36 @@ +#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ +#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_ + +#include +#include + +// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types + +namespace nbl +{ +namespace hlsl +{ +#ifdef __HLSL_VERSION +#ifdef NBL_FORCE_EMULATED_INT_64 +template +using portable_uint64_t = emulated_uint64_t; +template +using portable_int64_t = emulated_int64_t; +#else +template +using portable_uint64_t = typename conditional::shaderInt64, uint64_t, emulated_uint64_t>::type; +template +using portable_int64_t = typename conditional::shaderInt64, int64_t, emulated_int64_t>::type; +#endif + +#else +template +using portable_uint64_t = uint64_t; +template +using portable_int64_t = int64_t; +#endif + +} +} + +#endif \ No newline at end of file diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl index ace199e20b..16d5b40f81 100644 --- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl +++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl @@ -3,6 +3,7 @@ #include #include +#include namespace nbl { @@ -36,19 +37,53 @@ template using portable_vector_t4 = portable_vector_t; #ifdef __HLSL_VERSION +// Float template using portable_float64_t2 = portable_vector_t2 >; template using portable_float64_t3 = portable_vector_t3 >; template using portable_float64_t4 = portable_vector_t4 >; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2 >; +template +using portable_uint64_t3 = portable_vector_t3 >; +template +using portable_uint64_t4 = portable_vector_t4 >; + +//Int +template +using portable_int64_t2 = portable_vector_t2 >; +template +using portable_int64_t3 = portable_vector_t3 >; +template +using portable_int64_t4 = portable_vector_t4 >; #else +// Float template using portable_float64_t2 = portable_vector_t2; template using portable_float64_t3 = portable_vector_t3; template using portable_float64_t4 = portable_vector_t4; + +// Uint +template +using portable_uint64_t2 = portable_vector_t2; +template +using portable_uint64_t3 = portable_vector_t3; +template +using portable_uint64_t4 = portable_vector_t4; + +// Int +template +using portable_int64_t2 = portable_vector_t2; +template +using portable_int64_t3 = portable_vector_t3; +template +using portable_int64_t4 = portable_vector_t4; #endif } diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl index 7da69c4a55..8afceb7fef 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl @@ -4,13 +4,15 @@ #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_ +#include + #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke #include "spirv/unified1/spirv.hpp" #include #include #include -#include +#include namespace nbl { @@ -112,7 +114,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer::value; //! General Operations - + +//! Miscellaneous Instructions +template +[[vk::ext_instruction(spv::OpUndef)]] +T undef(); + // template [[vk::ext_instruction(spv::OpAccessChain)]] @@ -338,6 +345,11 @@ template [[vk::ext_instruction(spv::OpAny)]] enable_if_t&& is_same_v::scalar_type, bool>, BooleanVector> any(BooleanVector vec); +// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q) +template && (! concepts::Vector || (concepts::Vector && (extent_v == extent_v)))) +[[vk::ext_instruction(spv::OpSelect)]] +ResultType select(Condition condition, ResultType object1, ResultType object2); + template) [[vk::ext_instruction(spv::OpIAddCarry)]] AddCarryOutput addCarry(T operand1, T operand2); diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl index a6f9ad0655..a6b3db6708 100644 --- a/include/nbl/builtin/hlsl/type_traits.hlsl +++ b/include/nbl/builtin/hlsl/type_traits.hlsl @@ -664,6 +664,8 @@ using conditional_t = typename conditional::type; // Template Variables +template +NBL_CONSTEXPR T integral_constant_v = integral_constant::value; template NBL_CONSTEXPR bool is_same_v = is_same::value; template @@ -682,12 +684,20 @@ template NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of::value; template NBL_CONSTEXPR uint64_t extent_v = extent::value; +template +NBL_CONSTEXPR bool is_fundamental_v = is_fundamental::value; // Overlapping definitions template using make_void_t = typename make_void::type; +template +using make_signed_t = typename make_signed::type; + +template +using make_unsigned_t = typename make_unsigned::type; + template struct conditional_value { diff --git a/include/nbl/builtin/hlsl/workgroup/fft.hlsl b/include/nbl/builtin/hlsl/workgroup/fft.hlsl index 3b600cd8ad..c23841dd50 100644 --- a/include/nbl/builtin/hlsl/workgroup/fft.hlsl +++ b/include/nbl/builtin/hlsl/workgroup/fft.hlsl @@ -40,7 +40,7 @@ struct OptimalFFTParameters uint16_t workgroupSizeLog2 : 8; // Used to check if the parameters returned by `optimalFFTParameters` are valid - bool areValid() + bool areValid() NBL_CONST_MEMBER_FUNC { return elementsPerInvocationLog2 > 0 && workgroupSizeLog2 > 0; } @@ -53,9 +53,9 @@ struct OptimalFFTParameters * @param [in] inputArrayLength The length of the array to run an FFT on * @param [in] minSubgroupSize The smallest possible number of threads that can run in a single subgroup. 32 by default. */ -inline OptimalFFTParameters optimalFFTParameters(uint32_t maxWorkgroupSize, uint32_t inputArrayLength, uint32_t minSubgroupSize) +NBL_CONSTEXPR_FUNC OptimalFFTParameters optimalFFTParameters(uint32_t maxWorkgroupSize, uint32_t inputArrayLength, uint32_t minSubgroupSize) { - NBL_CONSTEXPR_STATIC OptimalFFTParameters invalidParameters = { 0 , 0 }; + const OptimalFFTParameters invalidParameters = { 0 , 0 }; if (minSubgroupSize < 4 || maxWorkgroupSize < minSubgroupSize || inputArrayLength <= minSubgroupSize) return invalidParameters; @@ -81,15 +81,16 @@ inline OptimalFFTParameters optimalFFTParameters(uint32_t maxWorkgroupSize, uint namespace impl { + template -enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftRightHigher(uint32_t i) +NBL_CONSTEXPR_FUNC enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftRightHigher(uint32_t i) { // Highest H bits are numbered N-1 through N - H // N - H is then the middle bit // Lowest bits numbered from 0 through N - H - 1 - NBL_CONSTEXPR_STATIC_INLINE uint32_t lowMask = (1 << (N - H)) - 1; - NBL_CONSTEXPR_STATIC_INLINE uint32_t midMask = 1 << (N - H); - NBL_CONSTEXPR_STATIC_INLINE uint32_t highMask = ~(lowMask | midMask); + const uint32_t lowMask = (1 << (N - H)) - 1; + const uint32_t midMask = 1 << (N - H); + const uint32_t highMask = ~(lowMask | midMask); uint32_t low = i & lowMask; uint32_t mid = i & midMask; @@ -102,14 +103,14 @@ enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftRightHigher(uint32_t } template -enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftLeftHigher(uint32_t i) +NBL_CONSTEXPR_FUNC enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftLeftHigher(uint32_t i) { // Highest H bits are numbered N-1 through N - H // N - 1 is then the highest bit, and N - 2 through N - H are the middle bits // Lowest bits numbered from 0 through N - H - 1 - NBL_CONSTEXPR_STATIC_INLINE uint32_t lowMask = (1 << (N - H)) - 1; - NBL_CONSTEXPR_STATIC_INLINE uint32_t highMask = 1 << (N - 1); - NBL_CONSTEXPR_STATIC_INLINE uint32_t midMask = ~(lowMask | highMask); + const uint32_t lowMask = (1 << (N - H)) - 1; + const uint32_t highMask = 1 << (N - 1); + const uint32_t midMask = ~(lowMask | highMask); uint32_t low = i & lowMask; uint32_t mid = i & midMask; @@ -120,6 +121,7 @@ enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftLeftHigher(uint32_t return mid | high | low; } + } //namespace impl template @@ -127,26 +129,26 @@ struct FFTIndexingUtils { // This function maps the index `outputIdx` in the output array of a Nabla FFT to the index `freqIdx` in the DFT such that `DFT[freqIdx] = NablaFFT[outputIdx]` // This is because Cooley-Tukey + subgroup operations end up spewing out the outputs in a weird order - static uint32_t getDFTIndex(uint32_t outputIdx) + NBL_CONSTEXPR_STATIC_FUNC uint32_t getDFTIndex(uint32_t outputIdx) { return impl::circularBitShiftRightHigher(hlsl::bitReverseAs(outputIdx, FFTSizeLog2)); } // This function maps the index `freqIdx` in the DFT to the index `idx` in the output array of a Nabla FFT such that `DFT[freqIdx] = NablaFFT[idx]` // It is essentially the inverse of `getDFTIndex` - static uint32_t getNablaIndex(uint32_t freqIdx) + NBL_CONSTEXPR_STATIC_FUNC uint32_t getNablaIndex(uint32_t freqIdx) { return hlsl::bitReverseAs(impl::circularBitShiftLeftHigher(freqIdx), FFTSizeLog2); } // Mirrors an index about the Nyquist frequency in the DFT order - static uint32_t getDFTMirrorIndex(uint32_t freqIdx) + NBL_CONSTEXPR_STATIC_FUNC uint32_t getDFTMirrorIndex(uint32_t freqIdx) { return (FFTSize - freqIdx) & (FFTSize - 1); } // Given an index `outputIdx` of an element into the Nabla FFT, get the index into the Nabla FFT of the element corresponding to its negative frequency - static uint32_t getNablaMirrorIndex(uint32_t outputIdx) + NBL_CONSTEXPR_STATIC_FUNC uint32_t getNablaMirrorIndex(uint32_t outputIdx) { return getNablaIndex(getDFTMirrorIndex(getDFTIndex(outputIdx))); } @@ -326,7 +328,7 @@ struct FFT, device template && fft::FFTSharedMemoryAccessor) static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) { - NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; + const uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; // Compute the indices only once const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); @@ -392,7 +394,7 @@ struct FFT, device_ template && fft::FFTSharedMemoryAccessor) static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) { - NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; + const uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; // Compute the indices only once const uint32_t threadID = uint32_t(SubgroupContiguousIndex()); @@ -453,8 +455,8 @@ struct FFT && fft::FFTSharedMemoryAccessor) static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) { - NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; - NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation; + const uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; + const uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation; [unroll] for (uint32_t stride = (ElementsPerInvocation / 2) * WorkgroupSize; stride > WorkgroupSize; stride >>= 1) @@ -501,8 +503,8 @@ struct FFT && fft::FFTSharedMemoryAccessor) static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor) { - NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; - NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation; + const uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize; + const uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation; // do K/2 small workgroup FFTs accessor_adaptors::Offset offsetAccessor; diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 9333a0d3b4..b2e914bf73 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -217,10 +217,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h") # emulated LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl") # portable LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl") # ieee754 @@ -249,6 +251,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl") #glsl compat @@ -369,5 +372,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl") #blur LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl") +#morton codes +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl") ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")