diff --git a/include/nbl/builtin/hlsl/algorithm.hlsl b/include/nbl/builtin/hlsl/algorithm.hlsl
index 3a7c4963c2..0178673f4e 100644
--- a/include/nbl/builtin/hlsl/algorithm.hlsl
+++ b/include/nbl/builtin/hlsl/algorithm.hlsl
@@ -18,7 +18,7 @@ namespace impl
     // TODO: use structs
 
     template<typename T>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
     {
         T tmp = lhs;
         lhs = rhs;
@@ -26,7 +26,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint16_t) lhs, NBL_REF_ARG(uint16_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -34,7 +34,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint32_t) lhs, NBL_REF_ARG(uint32_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -42,7 +42,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(uint64_t) lhs, NBL_REF_ARG(uint64_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -50,7 +50,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int16_t) lhs, NBL_REF_ARG(int16_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -58,7 +58,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int32_t) lhs, NBL_REF_ARG(int32_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -66,7 +66,7 @@ namespace impl
     }
 
     template<>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(int64_t) lhs, NBL_REF_ARG(int64_t) rhs)
     {
         lhs ^= rhs;
         rhs ^= lhs;
@@ -74,7 +74,7 @@ namespace impl
     }
 #else
     template<typename T>
-    NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
+    NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
     {
         std::swap(lhs, rhs);
     }
@@ -82,7 +82,7 @@ namespace impl
 }
 
 template<typename T>
-NBL_CONSTEXPR_INLINE_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
+NBL_CONSTEXPR_FUNC void swap(NBL_REF_ARG(T) lhs, NBL_REF_ARG(T) rhs)
 {
     impl::swap<T>(lhs, rhs);
 }
diff --git a/include/nbl/builtin/hlsl/complex.hlsl b/include/nbl/builtin/hlsl/complex.hlsl
index 6728a9bf3d..292271bb87 100644
--- a/include/nbl/builtin/hlsl/complex.hlsl
+++ b/include/nbl/builtin/hlsl/complex.hlsl
@@ -60,33 +60,34 @@ struct complex_t
     Scalar m_imag;
             
     // ------------------------- Constructors ---------------------------------------
-    static complex_t create(const Scalar real, const Scalar imag)
+    static complex_t create(NBL_CONST_REF_ARG(Scalar) real, NBL_CONST_REF_ARG(Scalar) imag)
     {
-            complex_t retVal = { real, imag };
-            return retVal;
+        complex_t retVal = { real, imag };
+        return retVal;
     }
-            
+
     // ------------------------- Member functions -------------------------------      
-    Scalar real() {
+    Scalar real() NBL_CONST_MEMBER_FUNC
+    {
         return m_real;
     }
-            
-    void real(const Scalar value)
+
+    void real(NBL_CONST_REF_ARG(Scalar) value)
     {
         m_real = value;
     }
-            
-    Scalar imag()
+
+    Scalar imag() NBL_CONST_MEMBER_FUNC
     {
         return m_imag;
     }
-            
-    void imag(const Scalar value)
+
+    void imag(NBL_CONST_REF_ARG(Scalar) value)
     {
         m_imag = value;
     }
     // ------------------------- Arithmetic operators -------------------------------   
-    complex_t operator+(const complex_t rhs)
+    complex_t operator+(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC
     {
         complex_t result;
         result.m_real = m_real + rhs.m_real;
@@ -94,8 +95,8 @@ struct complex_t
 
         return result;
     }
-            
-    complex_t operator-(const complex_t rhs)
+
+    complex_t operator-(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC
     {
         complex_t result;
 
@@ -105,7 +106,7 @@ struct complex_t
         return result;
     }
 
-    complex_t operator*(const complex_t rhs)
+    complex_t operator*(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC
     {
         complex_t result;
 
@@ -114,9 +115,9 @@ struct complex_t
 
         return result;
     }
-            
+
     // multiply by scalar
-    complex_t operator*(const Scalar scalar)
+    complex_t operator*(NBL_CONST_REF_ARG(Scalar) scalar) NBL_CONST_MEMBER_FUNC
     {
         complex_t result;
         result.m_real = m_real * scalar;
@@ -124,9 +125,9 @@ struct complex_t
 
         return result;
     }
-            
+
     // Divide by scalar
-    complex_t operator/(const Scalar scalar)
+    complex_t operator/(NBL_CONST_REF_ARG(Scalar) scalar) NBL_CONST_MEMBER_FUNC
     {
         complex_t result;
         result.m_real = m_real / scalar;
@@ -134,8 +135,8 @@ struct complex_t
 
         return result;
     }
-            
-    complex_t operator/(const complex_t rhs)
+
+    complex_t operator/(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC
     {
         complex_t result;
 
@@ -145,13 +146,13 @@ struct complex_t
 
         return result;
     }
-            
+
     // ----------------- Relational operators -----------------------------
-    bool operator==(const complex_t rhs)
+    bool operator==(NBL_CONST_REF_ARG(complex_t) rhs) NBL_CONST_MEMBER_FUNC
     {
             return m_real == rhs.m_real && m_imag == rhs.m_imag;
     }
-    bool operator!=(const complex_t rhs)
+    bool operator!=(const complex_t rhs) NBL_CONST_MEMBER_FUNC
     {
             return m_real != rhs.m_real || m_imag != rhs.m_imag;
     }
@@ -358,44 +359,44 @@ COMPLEX_COMPOUND_ASSIGN_IDENTITIES(float64_t4)
 // -------------------------------- Non-member functions --------------------------------------
     
 template<typename Scalar>
-Scalar real(const complex_t<Scalar> c) 
+Scalar real(NBL_CONST_REF_ARG(complex_t<Scalar>) c)
 {
     return c.m_real;
 }
     
 template<typename Scalar>
-Scalar imag(const complex_t<Scalar> c) 
+Scalar imag(NBL_CONST_REF_ARG(complex_t<Scalar>) c)
 {
     return c.m_imag;
 }
     
 template<typename Scalar>
-Scalar norm(const complex_t<Scalar> c) 
+Scalar norm(NBL_CONST_REF_ARG(complex_t<Scalar>) c)
 {
     return c.m_real * c.m_real + c.m_imag * c.m_imag;
 }
 
 template<typename Scalar>
-Scalar abs(const complex_t<Scalar> c) 
+Scalar abs(NBL_CONST_REF_ARG(complex_t<Scalar>) c)
 {
     return sqrt(norm(c));
 }
 
 template<typename Scalar>
-Scalar arg(const complex_t<Scalar> c) 
+Scalar arg(NBL_CONST_REF_ARG(complex_t<Scalar>) c)
 {
     return atan2(c.m_imag, c.m_real);
 }
 
 template<typename Scalar>
-complex_t<Scalar> conj(const complex_t<Scalar> c) 
+complex_t<Scalar> conj(NBL_CONST_REF_ARG(complex_t<Scalar>) c)
 {
-    complex_t<Scalar> retVal = {c.m_real, - c.m_imag};
+    complex_t<Scalar> retVal = { c.m_real, -c.m_imag };
     return retVal;
 }
 
 template<typename Scalar>
-complex_t<Scalar> proj(const complex_t<Scalar> c) 
+complex_t<Scalar> proj(NBL_CONST_REF_ARG(complex_t<Scalar>) c)
 {
     Scalar den = norm(c) + Scalar(1.0);
     complex_t<Scalar> retVal = { (Scalar(2.0) * c.m_real) / den , (Scalar(2.0) * c.m_imag) / den};
@@ -403,7 +404,7 @@ complex_t<Scalar> proj(const complex_t<Scalar> c)
 }
 
 template<typename Scalar>
-complex_t<Scalar> polar(const Scalar r, const Scalar theta) 
+complex_t<Scalar> polar(NBL_CONST_REF_ARG(Scalar) r, NBL_CONST_REF_ARG(Scalar) theta)
 {
     complex_t<Scalar> retVal = {r * cos(theta), r * sin(theta)};
     return retVal;
@@ -427,22 +428,6 @@ complex_t<Scalar> rotateRight(NBL_CONST_REF_ARG(complex_t<Scalar>) value)
     return retVal;
 }
 
-template<typename Scalar>
-struct ternary_operator< complex_t<Scalar> >
-{
-    using type_t = complex_t<Scalar>;
-
-    complex_t<Scalar> operator()(bool condition, NBL_CONST_REF_ARG(complex_t<Scalar>) lhs, NBL_CONST_REF_ARG(complex_t<Scalar>) rhs)
-    {
-        const vector<Scalar, 2> lhsVector = vector<Scalar, 2>(lhs.real(), lhs.imag());
-        const vector<Scalar, 2> rhsVector = vector<Scalar, 2>(rhs.real(), rhs.imag());
-        const vector<Scalar, 2> resultVector = condition ? lhsVector : rhsVector;
-        const complex_t<Scalar> result = { resultVector.x, resultVector.y };
-        return result;
-    }
-};
-
-
 }
 }
 
diff --git a/include/nbl/builtin/hlsl/concepts/core.hlsl b/include/nbl/builtin/hlsl/concepts/core.hlsl
index c1bc0277df..625414611d 100644
--- a/include/nbl/builtin/hlsl/concepts/core.hlsl
+++ b/include/nbl/builtin/hlsl/concepts/core.hlsl
@@ -74,12 +74,22 @@ struct is_emulating_floating_point_scalar
 {
 	NBL_CONSTEXPR_STATIC_INLINE bool value = FloatingPointScalar<T>;
 };
+
+template<typename T>
+struct is_emulating_integral_scalar
+{
+	NBL_CONSTEXPR_STATIC_INLINE bool value = IntegralScalar<T>;
+};
 }
 
 //! Floating point types are native floating point types or types that imitate native floating point types (for example emulated_float64_t)
 template<typename T>
 NBL_BOOL_CONCEPT FloatingPointLikeScalar = impl::is_emulating_floating_point_scalar<T>::value;
 
+//! Integral-like types are native integral types or types that imitate native integral types (for example emulated_uint64_t)
+template<typename T>
+NBL_BOOL_CONCEPT IntegralLikeScalar = impl::is_emulating_integral_scalar<T>::value;
+
 }
 }
 }
diff --git a/include/nbl/builtin/hlsl/concepts/vector.hlsl b/include/nbl/builtin/hlsl/concepts/vector.hlsl
index 468838730a..3ea3199951 100644
--- a/include/nbl/builtin/hlsl/concepts/vector.hlsl
+++ b/include/nbl/builtin/hlsl/concepts/vector.hlsl
@@ -40,6 +40,8 @@ NBL_BOOL_CONCEPT FloatingPointLikeVectorial = concepts::Vectorial<T> && concepts
 template<typename T>
 NBL_BOOL_CONCEPT IntVectorial = concepts::Vectorial<T> && (is_integral_v<typename vector_traits<T>::scalar_type>);
 template<typename T>
+NBL_BOOL_CONCEPT IntegralLikeVectorial = concepts::Vectorial<T> && concepts::IntegralLikeScalar<typename vector_traits<T>::scalar_type>;
+template<typename T>
 NBL_BOOL_CONCEPT SignedIntVectorial = concepts::Vectorial<T> && concepts::SignedIntegralScalar<typename vector_traits<T>::scalar_type>;
 
 }
diff --git a/include/nbl/builtin/hlsl/cpp_compat.hlsl b/include/nbl/builtin/hlsl/cpp_compat.hlsl
index 175a3e76c1..03d47864fb 100644
--- a/include/nbl/builtin/hlsl/cpp_compat.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat.hlsl
@@ -5,5 +5,9 @@
 // it includes vector and matrix
 #include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
 #include <nbl/builtin/hlsl/cpp_compat/promote.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/truncate.hlsl>
+
+// Had to push some stuff here to avoid circular dependencies
+#include <nbl/builtin/hlsl/cpp_compat/vector.hlsl>
 
 #endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/cpp_compat/basic.h b/include/nbl/builtin/hlsl/cpp_compat/basic.h
index 3802bd69ea..0985af6eb3 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/basic.h
+++ b/include/nbl/builtin/hlsl/cpp_compat/basic.h
@@ -2,35 +2,7 @@
 #define _NBL_BUILTIN_HLSL_CPP_COMPAT_BASIC_INCLUDED_
 
 #include <nbl/builtin/hlsl/macros.h>
-
-namespace nbl
-{
-namespace hlsl
-{
-namespace impl
-{
-template<typename To, typename From, typename Enabled = void>
-struct static_cast_helper
-{
-    static inline To cast(From u)
-    {
-#ifndef __HLSL_VERSION
-        return static_cast<To>(u);
-#else
-        return To(u);
-#endif
-    }
-};
-}
-
-template<typename To, typename From>
-inline To _static_cast(From v)
-{
-    return impl::static_cast_helper<To, From>::cast(v);
-}
-
-}
-}
+#include <nbl/builtin/hlsl/concepts/impl/base.hlsl>
 
 #ifndef __HLSL_VERSION
 #include <type_traits>
@@ -39,10 +11,12 @@ inline To _static_cast(From v)
 #define NBL_CONSTEXPR constexpr // TODO: rename to NBL_CONSTEXPR_VAR
 #define NBL_CONSTEXPR_FUNC constexpr
 #define NBL_CONSTEXPR_STATIC constexpr static
+#define NBL_CONSTEXPR_INLINE constexpr inline
 #define NBL_CONSTEXPR_STATIC_INLINE constexpr static inline
-#define NBL_CONSTEXPR_INLINE_FUNC constexpr inline
+#define NBL_CONSTEXPR_STATIC_FUNC constexpr static
 #define NBL_CONSTEXPR_FORCED_INLINE_FUNC NBL_FORCE_INLINE constexpr
 #define NBL_CONST_MEMBER_FUNC const
+#define NBL_IF_CONSTEXPR(...) if constexpr (__VA_ARGS__)
 
 namespace nbl::hlsl
 {
@@ -65,14 +39,17 @@ namespace nbl::hlsl
 
 #else
 
+
 #define ARROW .arrow().
 #define NBL_CONSTEXPR const static // TODO: rename to NBL_CONSTEXPR_VAR
-#define NBL_CONSTEXPR_FUNC
+#define NBL_CONSTEXPR_FUNC inline
 #define NBL_CONSTEXPR_STATIC const static
+#define NBL_CONSTEXPR_INLINE const static
 #define NBL_CONSTEXPR_STATIC_INLINE const static
-#define NBL_CONSTEXPR_INLINE_FUNC inline
+#define NBL_CONSTEXPR_STATIC_FUNC static inline
 #define NBL_CONSTEXPR_FORCED_INLINE_FUNC inline
-#define NBL_CONST_MEMBER_FUNC 
+#define NBL_CONST_MEMBER_FUNC
+#define NBL_IF_CONSTEXPR(...) if (__VA_ARGS__)
 
 namespace nbl
 {
@@ -100,4 +77,34 @@ struct add_pointer
 
 #endif
 
+namespace nbl
+{
+namespace hlsl
+{
+namespace impl
+{
+template<typename To, typename From, typename Enabled = void NBL_STRUCT_CONSTRAINABLE >
+struct static_cast_helper
+{
+    NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) u)
+    {
+#ifndef __HLSL_VERSION
+        return static_cast<To>(u);
+#else
+        return To(u);
+#endif
+    }
+};
+
+}
+
+template<typename To, typename From>
+NBL_CONSTEXPR_FUNC To _static_cast(NBL_CONST_REF_ARG(From) v)
+{
+    return impl::static_cast_helper<To, From>::cast(v);
+}
+
+}
+}
+
 #endif
diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
index 0309b78e0d..e824c45ee1 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
@@ -75,6 +75,8 @@ template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct all_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct any_helper;
+template<typename B, typename T NBL_STRUCT_CONSTRAINABLE>
+struct select_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct bitReverseAs_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
@@ -104,6 +106,12 @@ struct nMax_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct nClamp_helper;
 template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct addCarry_helper;
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct subBorrow_helper;
+template<typename T NBL_STRUCT_CONSTRAINABLE>
+struct undef_helper;
+template<typename T NBL_STRUCT_CONSTRAINABLE>
 struct fma_helper;
 
 #ifdef __HLSL_VERSION // HLSL only specializations
@@ -118,8 +126,8 @@ struct fma_helper;
 // the template<> needs to be written ourselves
 // return type is __VA_ARGS__ to protect against `,` in templated return types
 #define AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(HELPER_NAME, SPIRV_FUNCTION_NAME, ARG_TYPE_LIST, ARG_TYPE_SET, ...)\
-NBL_PARTIAL_REQ_TOP(is_same_v<decltype(spirv::SPIRV_FUNCTION_NAME<T>(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \
-struct HELPER_NAME<BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) NBL_PARTIAL_REQ_BOT(is_same_v<decltype(spirv::SPIRV_FUNCTION_NAME<T>(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\
+NBL_PARTIAL_REQ_TOP(is_same_v<decltype(spirv::SPIRV_FUNCTION_NAME< BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) >(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) \
+struct HELPER_NAME<BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) NBL_PARTIAL_REQ_BOT(is_same_v<decltype(spirv::SPIRV_FUNCTION_NAME< BOOST_PP_SEQ_FOR_EACH_I(WRAP, _, ARG_TYPE_LIST) >(BOOST_PP_SEQ_FOR_EACH_I(DECLVAL, _, ARG_TYPE_SET))), __VA_ARGS__ >) >\
 {\
 	using return_t = __VA_ARGS__;\
 	static inline return_t __call( BOOST_PP_SEQ_FOR_EACH_I(DECL_ARG, _, ARG_TYPE_SET) )\
@@ -141,8 +149,9 @@ template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(length_helper, length,
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(normalize_helper, normalize, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(rsqrt_helper, inverseSqrt, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fract_helper, fract, (T), (T), T)
-template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, any, (T), (T), T)
+template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(all_helper, all, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(any_helper, any, (T), (T), T)
+template<typename B, typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(select_helper, select, (B)(T), (B)(T)(T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, fSign, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(sign_helper, sSign, (T), (T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(radians_helper, radians, (T), (T), T)
@@ -164,6 +173,10 @@ template<typename T, typename U> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(refract_hel
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMax_helper, nMax, (T), (T)(T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nMin_helper, nMin, (T), (T)(T), T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(nClamp_helper, nClamp, (T), (T)(T), T)
+// Can use trivial case and not worry about restricting `T` with a concept since `spirv::AddCarryOutput / SubBorrowOutput` already take care of that
+template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(addCarry_helper, addCarry, (T), (T)(T), spirv::AddCarryOutput<T>)
+template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(subBorrow_helper, subBorrow, (T), (T)(T), spirv::SubBorrowOutput<T>)
+template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(undef_helper, undef, (T), , T)
 template<typename T> AUTO_SPECIALIZE_TRIVIAL_CASE_HELPER(fma_helper, fma, (T), (T)(T)(T), T)
 
 #define BITCOUNT_HELPER_RETRUN_TYPE conditional_t<is_vector_v<T>, vector<int32_t, vector_traits<T>::Dimension>, int32_t>
@@ -602,6 +615,72 @@ struct nClamp_helper<T>
 	}
 };
 
+// Once again no need to restrict the two below with concepts for same reason as HLSL version
+template<typename T>
+struct addCarry_helper
+{
+	using return_t = spirv::AddCarryOutput<T>;
+	constexpr static inline return_t __call(const T operand1, const T operand2)
+	{
+		return_t retVal;
+		retVal.result = operand1 + operand2;
+		retVal.carry = T(retVal.result < operand1);
+		return retVal;
+	}
+};
+
+template<typename T>
+struct subBorrow_helper
+{
+	using return_t = spirv::SubBorrowOutput<T>;
+	constexpr static inline return_t __call(const T operand1, const T operand2)
+	{
+		return_t retVal;
+		retVal.result = static_cast<T>(operand1 - operand2);
+		retVal.borrow = T(operand1 < operand2);
+		return retVal;
+	}
+};
+
+template<typename B, typename T>
+NBL_PARTIAL_REQ_TOP(concepts::BooleanScalar<B>)
+struct select_helper<B, T NBL_PARTIAL_REQ_BOT(concepts::BooleanScalar<B>) >
+{
+	NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2)
+	{
+		return condition ? object1 : object2;
+	}
+};
+
+template<typename B, typename T>
+NBL_PARTIAL_REQ_TOP(concepts::Boolean<B>&& concepts::Vector<B>&& concepts::Vector<T> && (extent_v<B> == extent_v<T>))
+struct select_helper<B, T NBL_PARTIAL_REQ_BOT(concepts::Boolean<B>&& concepts::Vector<B>&& concepts::Vector<T> && (extent_v<B> == extent_v<T>)) >
+{
+	NBL_CONSTEXPR_STATIC_FUNC T __call(NBL_CONST_REF_ARG(B) condition, NBL_CONST_REF_ARG(T) object1, NBL_CONST_REF_ARG(T) object2)
+	{
+		using traits = hlsl::vector_traits<T>;
+		array_get<B, bool> conditionGetter;
+		array_get<T, typename traits::scalar_type> objectGetter;
+		array_set<T, typename traits::scalar_type> setter;
+
+		T selected;
+		for (uint32_t i = 0; i < traits::Dimension; ++i)
+			setter(selected, i, conditionGetter(condition, i) ? objectGetter(object1, i) : objectGetter(object2, i));
+
+		return selected;
+	}
+};
+
+template<typename T>
+struct undef_helper
+{
+	NBL_CONSTEXPR_STATIC_FUNC T __call()
+	{
+		T t;
+		return t;
+	}
+};
+
 template<typename FloatingPoint>
 requires concepts::FloatingPointScalar<FloatingPoint>
 struct fma_helper<FloatingPoint>
diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
index a5747a5fb7..3cd0cf2f8b 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
@@ -23,6 +23,12 @@ namespace nbl
 namespace hlsl
 {
 
+template<typename T>
+NBL_CONSTEXPR_FUNC T undef()
+{
+	return cpp_compat_intrinsics_impl::undef_helper<T>::__call();
+}
+
 template<typename T>
 inline typename cpp_compat_intrinsics_impl::bitCount_helper<T>::return_t bitCount(NBL_CONST_REF_ARG(T) val)
 {
@@ -150,6 +156,12 @@ inline bool any(Vector vec)
 	return cpp_compat_intrinsics_impl::any_helper<Vector>::__call(vec);
 }
 
+template<typename Condition, typename ResultType>
+NBL_CONSTEXPR_FUNC ResultType select(Condition condition, ResultType object1, ResultType object2)
+{
+	return cpp_compat_intrinsics_impl::select_helper<Condition, ResultType>::__call(condition, object1, object2);
+}
+
 /**
 * @brief Returns x - floor(x).
 *
@@ -217,6 +229,19 @@ inline T refract(NBL_CONST_REF_ARG(T) I, NBL_CONST_REF_ARG(T) N, NBL_CONST_REF_A
 	return cpp_compat_intrinsics_impl::refract_helper<T, U>::__call(I, N, eta);
 }
 
+template<typename T>
+NBL_CONSTEXPR_FUNC spirv::AddCarryOutput<T> addCarry(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2)
+{
+	return cpp_compat_intrinsics_impl::addCarry_helper<T>::__call(operand1, operand2);
+}
+
+template<typename T>
+NBL_CONSTEXPR_FUNC spirv::SubBorrowOutput<T> subBorrow(NBL_CONST_REF_ARG(T) operand1, NBL_CONST_REF_ARG(T) operand2)
+{
+	return cpp_compat_intrinsics_impl::subBorrow_helper<T>::__call(operand1, operand2);
+}
+
+
 #ifdef __HLSL_VERSION
 #define NAMESPACE spirv
 #else
diff --git a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl
index 51ca73f6d3..0afe214de7 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/promote.hlsl
@@ -15,7 +15,7 @@ namespace impl
 template<typename T, typename U>
 struct Promote
 {
-    T operator()(U v)
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v)
     {
         return T(v);
     }
@@ -26,7 +26,7 @@ struct Promote
 template<typename Scalar, typename U>
 struct Promote<vector <Scalar, 1>, U>
 {
-    enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 1> > operator()(U v)
+    NBL_CONSTEXPR_FUNC enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 1> > operator()(NBL_CONST_REF_ARG(U) v)
     {
         vector <Scalar, 1> promoted = {Scalar(v)};
         return promoted;
@@ -36,7 +36,7 @@ struct Promote<vector <Scalar, 1>, U>
 template<typename Scalar, typename U>
 struct Promote<vector <Scalar, 2>, U>
 {
-    enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 2> > operator()(U v)
+    NBL_CONSTEXPR_FUNC enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 2> > operator()(NBL_CONST_REF_ARG(U) v)
     {
         vector <Scalar, 2> promoted = {Scalar(v), Scalar(v)};
         return promoted;
@@ -46,7 +46,7 @@ struct Promote<vector <Scalar, 2>, U>
 template<typename Scalar, typename U>
 struct Promote<vector <Scalar, 3>, U>
 {
-    enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 3> > operator()(U v)
+    NBL_CONSTEXPR_FUNC enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 3> > operator()(NBL_CONST_REF_ARG(U) v)
     {
         vector <Scalar, 3> promoted = {Scalar(v), Scalar(v), Scalar(v)};
         return promoted;
@@ -56,7 +56,7 @@ struct Promote<vector <Scalar, 3>, U>
 template<typename Scalar, typename U>
 struct Promote<vector <Scalar, 4>, U>
 {
-    enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 4> > operator()(U v)
+    NBL_CONSTEXPR_FUNC enable_if_t<is_scalar<Scalar>::value && is_scalar<U>::value, vector <Scalar, 4> > operator()(NBL_CONST_REF_ARG(U) v)
     {
         vector <Scalar, 4> promoted = {Scalar(v), Scalar(v), Scalar(v), Scalar(v)};
         return promoted;
@@ -68,7 +68,7 @@ struct Promote<vector <Scalar, 4>, U>
 }
 
 template<typename T, typename U>
-T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref)
+NBL_CONSTEXPR_FUNC T promote(const U v) // TODO: use NBL_CONST_REF_ARG(U) instead of U v (circular ref)
 {
     impl::Promote<T,U> _promote;
     return _promote(v);
diff --git a/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl
new file mode 100644
index 0000000000..a95df183be
--- /dev/null
+++ b/include/nbl/builtin/hlsl/cpp_compat/truncate.hlsl
@@ -0,0 +1,76 @@
+#ifndef _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_
+#define _NBL_BUILTIN_HLSL_CPP_COMPAT_TRUNCATE_INCLUDED_
+
+#include "nbl/builtin/hlsl/type_traits.hlsl"
+#include "nbl/builtin/hlsl/concepts/core.hlsl"
+
+namespace nbl
+{
+namespace hlsl
+{
+
+namespace impl
+{
+
+template<typename T, typename U NBL_STRUCT_CONSTRAINABLE >
+struct Truncate
+{
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(U) v)
+    {
+        return T(v);
+    }
+};
+
+template<typename Scalar, uint16_t N> NBL_PARTIAL_REQ_TOP(concepts::Scalar<Scalar>)
+struct Truncate<vector<Scalar, 1>, vector<Scalar, N> NBL_PARTIAL_REQ_BOT(concepts::Scalar<Scalar>) >
+{
+    NBL_CONSTEXPR_FUNC vector<Scalar, 1> operator()(NBL_CONST_REF_ARG(vector<Scalar, N>) v)
+    {
+        vector<Scalar, 1> truncated = { v[0] };
+        return truncated;
+    }
+};
+
+template<typename Scalar, uint16_t N> NBL_PARTIAL_REQ_TOP(concepts::Scalar<Scalar> && N >= 2)
+struct Truncate<vector<Scalar, 2>, vector<Scalar, N> NBL_PARTIAL_REQ_BOT(concepts::Scalar<Scalar> && N >= 2) >
+{
+    NBL_CONSTEXPR_FUNC vector<Scalar, 2> operator()(NBL_CONST_REF_ARG(vector<Scalar, N>) v)
+    {
+        vector<Scalar, 2> truncated = { v[0], v[1]};
+        return truncated;
+    }
+};
+
+template<typename Scalar, uint16_t N> NBL_PARTIAL_REQ_TOP(concepts::Scalar<Scalar>&& N >= 3)
+struct Truncate<vector<Scalar, 3>, vector<Scalar, N> NBL_PARTIAL_REQ_BOT(concepts::Scalar<Scalar>&& N >= 3) >
+{
+    NBL_CONSTEXPR_FUNC vector<Scalar, 3> operator()(NBL_CONST_REF_ARG(vector<Scalar, N>) v)
+    {
+        vector<Scalar, 3> truncated = { v[0], v[1], v[2] };
+        return truncated;
+    }
+};
+
+template<typename Scalar, uint16_t N> NBL_PARTIAL_REQ_TOP(concepts::Scalar<Scalar>&& N >= 4)
+struct Truncate<vector<Scalar, 4>, vector<Scalar, N> NBL_PARTIAL_REQ_BOT(concepts::Scalar<Scalar>&& N >= 4) >
+{
+    NBL_CONSTEXPR_FUNC vector<Scalar, 4> operator()(NBL_CONST_REF_ARG(vector<Scalar, N>) v)
+    {
+        vector<Scalar, 4> truncated = { v[0], v[1], v[2], v[3] };
+        return truncated;
+    }
+};
+
+} //namespace impl
+
+template<typename T, typename U>
+NBL_CONSTEXPR_FUNC T truncate(NBL_CONST_REF_ARG(U) v)
+{
+    impl::Truncate<T, U> _truncate;
+    return _truncate(v);
+}
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl
index a0cde90df9..2dfc52c957 100644
--- a/include/nbl/builtin/hlsl/emulated/float64_t.hlsl
+++ b/include/nbl/builtin/hlsl/emulated/float64_t.hlsl
@@ -412,25 +412,25 @@ inline int extractExponent(__VA_ARGS__ x)\
 }\
 \
 template<>\
-NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size<sizeof(__VA_ARGS__)>::type biasedExp)\
+NBL_CONSTEXPR_FUNC __VA_ARGS__ replaceBiasedExponent(__VA_ARGS__ x, typename unsigned_integer_of_size<sizeof(__VA_ARGS__)>::type biasedExp)\
 {\
     return __VA_ARGS__(replaceBiasedExponent(x.data, biasedExp));\
 }\
 \
 template <>\
-NBL_CONSTEXPR_INLINE_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\
+NBL_CONSTEXPR_FUNC __VA_ARGS__ fastMulExp2(__VA_ARGS__ x, int n)\
 {\
     return __VA_ARGS__(replaceBiasedExponent(x.data, extractBiasedExponent(x) + uint32_t(n)));\
 }\
 \
 template <>\
-NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size<sizeof(__VA_ARGS__)>::type extractMantissa(__VA_ARGS__ x)\
+NBL_CONSTEXPR_FUNC unsigned_integer_of_size<sizeof(__VA_ARGS__)>::type extractMantissa(__VA_ARGS__ x)\
 {\
     return extractMantissa(x.data);\
 }\
 \
 template <>\
-NBL_CONSTEXPR_INLINE_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\
+NBL_CONSTEXPR_FUNC uint64_t extractNormalizeMantissa(__VA_ARGS__ x)\
 {\
     return extractNormalizeMantissa(x.data);\
 }\
@@ -577,10 +577,10 @@ namespace ieee754
 {
 namespace impl
 {
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t<true, true> x) { return x.data; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t<false, false> x) { return x.data; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t<true, false> x) { return x.data; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t bitCastToUintType(emulated_float64_t<false, true> x) { return x.data; }
+template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t<true, true> x) { return x.data; }
+template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t<false, false> x) { return x.data; }
+template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t<true, false> x) { return x.data; }
+template<> NBL_CONSTEXPR_FUNC uint64_t bitCastToUintType(emulated_float64_t<false, true> x) { return x.data; }
 }
 
 IMPLEMENT_IEEE754_FUNC_SPEC_FOR_EMULATED_F64_TYPE(emulated_float64_t<true, true>);
diff --git a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl
index 44b881345d..df785e3e8f 100644
--- a/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl
+++ b/include/nbl/builtin/hlsl/emulated/float64_t_impl.hlsl
@@ -41,7 +41,7 @@ namespace hlsl
 {
 namespace emulated_float64_t_impl
 {
-NBL_CONSTEXPR_INLINE_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64)
+NBL_CONSTEXPR_FUNC uint64_t2 shiftMantissaLeftBy53(uint64_t mantissa64)
 {
     uint64_t2 output;
     output.x = mantissa64 >> (64 - ieee754::traits<float64_t>::mantissaBitCnt);
@@ -74,7 +74,7 @@ inline uint64_t castFloat32ToStorageType(float32_t val)
     }
 };
 
-NBL_CONSTEXPR_INLINE_FUNC bool isZero(uint64_t val)
+NBL_CONSTEXPR_FUNC bool isZero(uint64_t val)
 {
     return (val << 1) == 0ull;
 }
@@ -137,18 +137,18 @@ inline uint64_t reinterpretAsFloat64BitPattern<int64_t>(int64_t val)
     return sign | reinterpretAsFloat64BitPattern(absVal);
 };
 
-NBL_CONSTEXPR_INLINE_FUNC uint64_t flushDenormToZero(uint64_t value)
+NBL_CONSTEXPR_FUNC uint64_t flushDenormToZero(uint64_t value)
 {
     const uint64_t biasBits = value & ieee754::traits<float64_t>::exponentMask;
     return biasBits ? value : (value & ieee754::traits<float64_t>::signMask);
 }
 
-NBL_CONSTEXPR_INLINE_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa)
+NBL_CONSTEXPR_FUNC uint64_t assembleFloat64(uint64_t signShifted, uint64_t expShifted, uint64_t mantissa)
 {
     return  signShifted | expShifted | mantissa;
 }
 
-NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs)
+NBL_CONSTEXPR_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs)
 {
     lhs &= ~ieee754::traits<float64_t>::signMask;
     rhs &= ~ieee754::traits<float64_t>::signMask;
@@ -156,18 +156,18 @@ NBL_CONSTEXPR_INLINE_FUNC bool areBothInfinity(uint64_t lhs, uint64_t rhs)
     return lhs == rhs && lhs == ieee754::traits<float64_t>::inf;
 }
 
-NBL_CONSTEXPR_INLINE_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs)
+NBL_CONSTEXPR_FUNC bool areBothZero(uint64_t lhs, uint64_t rhs)
 {
     return !bool((lhs | rhs) << 1);
 }
 
-NBL_CONSTEXPR_INLINE_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs)
+NBL_CONSTEXPR_FUNC bool areBothSameSignZero(uint64_t lhs, uint64_t rhs)
 {
     return !bool((lhs) << 1) && (lhs == rhs);
 }
 
 template<bool FastMath, typename Op>
-NBL_CONSTEXPR_INLINE_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs)
+NBL_CONSTEXPR_FUNC bool operatorLessAndGreaterCommonImplementation(uint64_t lhs, uint64_t rhs)
 {
     if (!FastMath)
     {
diff --git a/include/nbl/builtin/hlsl/emulated/int64_t.hlsl b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl
new file mode 100644
index 0000000000..8a3fd42faf
--- /dev/null
+++ b/include/nbl/builtin/hlsl/emulated/int64_t.hlsl
@@ -0,0 +1,514 @@
+#ifndef _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_
+#define _NBL_BUILTIN_HLSL_EMULATED_INT64_T_HLSL_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/functional.hlsl"
+#include "nbl/builtin/hlsl/concepts/core.hlsl"
+#include "nbl/builtin/hlsl/bit.hlsl"
+
+// Didn't bother with operator*, operator/, implement if you need them. Multiplication is pretty straightforward, division requires switching on signs 
+// and whether the topmost bits of the divisor are equal to 0
+// - Francisco
+
+namespace nbl 
+{
+namespace hlsl
+{
+
+template<bool Signed>
+struct emulated_int64_base
+{
+    using storage_t = vector<uint32_t, 2>;
+    using this_t = emulated_int64_base<Signed>;
+    using this_signed_t = emulated_int64_base<true>;
+
+    storage_t data;
+
+    // ---------------------------------------------------- CONSTRUCTORS ---------------------------------------------------------------
+
+    #ifndef __HLSL_VERSION
+
+    emulated_int64_base() = default;
+
+    // GLM requires these to cast vectors because it uses a native `static_cast`
+    template<concepts::IntegralScalar I>
+    constexpr explicit emulated_int64_base(const I& toEmulate);
+
+    constexpr explicit emulated_int64_base(const emulated_int64_base<!Signed>& other) : data(other.data) {}
+
+    #endif
+
+    /**
+    * @brief Creates an `emulated_int64` from a vector of two `uint32_t`s representing its bitpattern
+    *
+    * @param [in] _data Vector of `uint32_t` encoding the `uint64_t/int64_t` being emulated. Stored as little endian (first component are the lower 32 bits)
+    */
+    NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(storage_t) _data)
+    {
+        this_t retVal;
+        retVal.data = _data;
+        return retVal;
+    }
+
+    /**
+    * @brief Creates an `emulated_int64` from two `uint32_t`s representing its bitpattern
+    *
+    * @param [in] lo Lowest 32 bits of the `uint64_t/int64_t` being emulated
+    * @param [in] hi Highest 32 bits of the `uint64_t/int64_t` being emulated
+    */
+    NBL_CONSTEXPR_STATIC_FUNC this_t create(NBL_CONST_REF_ARG(uint32_t) lo, NBL_CONST_REF_ARG(uint32_t) hi)
+    {
+        return create(storage_t(lo, hi));
+    }
+
+    // ------------------------------------------------------- CONVERSION OPERATORS---------------------------------------------------------------
+    // GLM requires these for vector casts
+
+    #ifndef __HLSL_VERSION
+
+    template<concepts::IntegralScalar I>
+    constexpr explicit operator I() const noexcept;
+
+    #endif
+
+    // ------------------------------------------------------- INTERNAL GETTERS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC uint32_t __getLSB() NBL_CONST_MEMBER_FUNC
+    {
+        return data.x;
+    }
+
+    NBL_CONSTEXPR_FUNC uint32_t __getMSB() NBL_CONST_MEMBER_FUNC
+    {
+        return data.y;
+    }
+
+    // ------------------------------------------------------- BITWISE OPERATORS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal = create(data & rhs.data);
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal = create(data | rhs.data);
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal = create(data ^ rhs.data);
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal = create(~data);
+        return retVal;
+    }
+
+    // Only valid in CPP
+    #ifndef __HLSL_VERSION
+    constexpr inline this_t operator<<(uint32_t bits) const;
+    constexpr inline this_t operator>>(uint32_t bits) const;
+
+    #endif
+
+    // ------------------------------------------------------- ARITHMETIC OPERATORS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC
+    {
+        vector<int32_t, 2> negated = -data;
+        return this_signed_t::create(_static_cast<storage_t>(negated));
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        const spirv::AddCarryOutput<uint32_t> lowerAddResult = addCarry(__getLSB(), rhs.__getLSB());
+        return create(lowerAddResult.result, __getMSB() + rhs.__getMSB() + lowerAddResult.carry);
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        const spirv::SubBorrowOutput<uint32_t> lowerSubResult = subBorrow(__getLSB(), rhs.__getLSB());
+        return create(lowerSubResult.result, __getMSB() - rhs.__getMSB() - lowerSubResult.borrow);
+    }
+
+    // ------------------------------------------------------- COMPARISON OPERATORS -------------------------------------------------
+    NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        equal_to<storage_t> equals;
+        return all(equals(data, rhs.data));
+    }
+
+    NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        not_equal_to<storage_t> notEquals;
+        return any(notEquals(data, rhs.data));
+    }
+
+    NBL_CONSTEXPR_FUNC bool operator<(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        // Either the topmost bits, when interpreted with correct sign, are less than those of `rhs`, or they're equal and the lower bits are less
+        // (lower bits are always positive in both unsigned and 2's complement so comparison can happen as-is)
+        const bool MSBEqual = __getMSB() == rhs.__getMSB();
+        const bool MSB = Signed ? (bit_cast<int32_t>(__getMSB()) < bit_cast<int32_t>(rhs.__getMSB())) : (__getMSB() < rhs.__getMSB());
+        const bool LSB = __getLSB() < rhs.__getLSB();
+        return MSBEqual ? LSB : MSB;
+    }
+
+    NBL_CONSTEXPR_FUNC bool operator>(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        // Same reasoning as above
+        const bool MSBEqual = __getMSB() == rhs.__getMSB();
+        const bool MSB = Signed ? (bit_cast<int32_t>(__getMSB()) > bit_cast<int32_t>(rhs.__getMSB())) : (__getMSB() > rhs.__getMSB());
+        const bool LSB = __getLSB() > rhs.__getLSB();
+        return MSBEqual ? LSB : MSB;
+    }
+
+    NBL_CONSTEXPR_FUNC bool operator<=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return !operator>(rhs);
+    }
+
+    NBL_CONSTEXPR_FUNC bool operator>=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return !operator<(rhs);
+    }
+};
+
+using emulated_uint64_t = emulated_int64_base<false>;
+using emulated_int64_t = emulated_int64_base<true>;
+
+namespace impl
+{
+
+template<bool Signed>
+struct static_cast_helper<emulated_int64_base<Signed>, emulated_int64_base<!Signed> >
+{
+    using To = emulated_int64_base<Signed>;
+    using From = emulated_int64_base<!Signed>;
+
+    NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) other)
+    {
+        To retVal;
+        retVal.data = other.data;
+        return retVal;
+    }
+};
+
+template<typename I, bool Signed> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<I> && (sizeof(I) <= sizeof(uint32_t)))
+struct static_cast_helper<I, emulated_int64_base<Signed> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<I> && (sizeof(I) <= sizeof(uint32_t))) >
+{
+    using To = I;
+    using From = emulated_int64_base<Signed>;
+
+    // Return only the lowest bits
+    NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val)
+    {
+        return _static_cast<To>(val.data.x);
+    }
+};
+
+template<typename I, bool Signed> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<I> && (sizeof(I) > sizeof(uint32_t)))
+struct static_cast_helper<I, emulated_int64_base<Signed> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<I> && (sizeof(I) > sizeof(uint32_t))) >
+{
+    using To = I;
+    using From = emulated_int64_base<Signed>;
+
+    NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) val)
+    {
+        return bit_cast<To>(val.data);
+    }
+};
+
+template<typename I, bool Signed> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<I> && (sizeof(I) <= sizeof(uint32_t)))
+struct static_cast_helper<emulated_int64_base<Signed>, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<I> && (sizeof(I) <= sizeof(uint32_t))) >
+{
+    using To = emulated_int64_base<Signed>;
+    using From = I;
+
+    // Set only lower bits
+    NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i)
+    {
+        return To::create(_static_cast<uint32_t>(i), uint32_t(0));
+    }
+};
+
+template<typename I, bool Signed> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<I> && (sizeof(I) > sizeof(uint32_t)))
+struct static_cast_helper<emulated_int64_base<Signed>, I NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<I> && (sizeof(I) > sizeof(uint32_t))) >
+{
+    using To = emulated_int64_base<Signed>;
+    using From = I;
+
+    NBL_CONSTEXPR_STATIC_FUNC To cast(NBL_CONST_REF_ARG(From) i)
+    {
+        // `bit_cast` blocked by GLM vectors using a union
+        #ifndef __HLSL_VERSION
+        return To::create(_static_cast<uint32_t>(i), _static_cast<uint32_t>(i >> 32));
+        #else
+        To retVal;
+        retVal.data = bit_cast<vector<uint32_t, 2> >(i);
+        return retVal;
+        #endif 
+    }
+};
+
+} //namespace impl
+
+// Define constructor and conversion operators
+
+#ifndef __HLSL_VERSION
+
+template<bool Signed>
+template<concepts::IntegralScalar I>
+constexpr emulated_int64_base<Signed>::emulated_int64_base(const I& toEmulate)
+{
+    *this = _static_cast<emulated_int64_base<Signed>>(toEmulate);
+}
+
+template<bool Signed>
+template<concepts::IntegralScalar I>
+constexpr emulated_int64_base<Signed>::operator I() const noexcept
+{
+    return _static_cast<I>(*this);
+}
+
+#endif
+
+// ---------------------- Functional operators ------------------------
+
+template<bool Signed>
+struct left_shift_operator<emulated_int64_base<Signed> >
+{
+    using type_t = emulated_int64_base<Signed>;
+    NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t));
+
+    // Can't do generic templated definition, see:
+    //https://github.com/microsoft/DirectXShaderCompiler/issues/7325
+    
+    // If `_bits > 63` or `_bits < 0` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits)
+    {
+        const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites LSB
+        const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits;
+        const type_t shifted = type_t::create(bigShift ? vector<uint32_t, 2>(0, operand.__getLSB() << shift)
+                                                       : vector<uint32_t, 2>(operand.__getLSB() << bits, (operand.__getMSB() << bits) | (operand.__getLSB() >> shift)));
+        ternary_operator<type_t> ternary;
+        return ternary(bool(bits), shifted, operand);
+    }
+
+    // If `_bits > 63` or `_bits < 0` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits)
+    {
+        return operator()(operand, _static_cast<uint32_t>(bits));
+    }
+};
+
+template<>
+struct arithmetic_right_shift_operator<emulated_uint64_t>
+{
+    using type_t = emulated_uint64_t;
+    NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t));
+
+    // Can't do generic templated definition, see:
+    //https://github.com/microsoft/DirectXShaderCompiler/issues/7325
+
+    // If `_bits > 63` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits)
+    {
+        const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB
+        const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits;
+        const type_t shifted = type_t::create(bigShift ? vector<uint32_t, 2>(operand.__getMSB() >> shift, 0)
+                                                       : vector<uint32_t, 2>((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), operand.__getMSB() >> bits));
+        ternary_operator<type_t> ternary;
+        return ternary(bool(bits), shifted, operand);
+    }
+
+    // If `_bits > 63` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits)
+    {
+        return operator()(operand, _static_cast<uint32_t>(bits));
+    }
+};
+
+template<>
+struct arithmetic_right_shift_operator<emulated_int64_t>
+{
+    using type_t = emulated_int64_t;
+    NBL_CONSTEXPR_STATIC uint32_t ComponentBitWidth = uint32_t(8 * sizeof(uint32_t));
+
+    // Can't do generic templated definition, see:
+    //https://github.com/microsoft/DirectXShaderCompiler/issues/7325
+
+    // If `_bits > 63` or `_bits < 0` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint32_t bits)
+    {
+        const bool bigShift = bits >= ComponentBitWidth; // Shift that completely rewrites MSB
+        const uint32_t shift = bigShift ? bits - ComponentBitWidth : ComponentBitWidth - bits;
+        const type_t shifted = type_t::create(bigShift ? vector<uint32_t, 2>(uint32_t(int32_t(operand.__getMSB()) >> shift), int32_t(operand.__getMSB()) < 0 ? ~uint32_t(0) : uint32_t(0))
+                                                                        : vector<uint32_t, 2>((operand.__getMSB() << shift) | (operand.__getLSB() >> bits), uint32_t(int32_t(operand.__getMSB()) >> bits)));
+        ternary_operator<type_t> ternary;
+        return ternary(bool(bits), shifted, operand);
+    }
+
+    // If `_bits > 63` or `_bits < 0` the result is undefined
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, type_t bits)
+    {
+        return operator()(operand, _static_cast<uint32_t>(bits));
+    }
+};
+
+#ifndef __HLSL_VERSION
+
+template<bool Signed>
+constexpr inline emulated_int64_base<Signed> emulated_int64_base<Signed>::operator<<(uint32_t bits) const
+{
+    left_shift_operator<emulated_uint64_t> leftShift;
+    return leftShift(*this, bits);
+}
+
+constexpr inline emulated_uint64_t emulated_uint64_t::operator>>(uint32_t bits) const
+{
+    arithmetic_right_shift_operator<emulated_uint64_t> rightShift;
+    return rightShift(*this, bits);
+}
+
+constexpr inline emulated_int64_t emulated_int64_t::operator>>(uint32_t bits) const
+{
+    arithmetic_right_shift_operator<emulated_int64_t> rightShift;
+    return rightShift(*this, bits);
+}
+
+#endif
+
+// ---------------------- STD arithmetic operators ------------------------
+// Specializations of the structs found in functional.hlsl
+// These all have to be specialized because of the identity that can't be initialized inside the struct definition
+
+template<bool Signed>
+struct plus<emulated_int64_base<Signed> >
+{
+    using type_t = emulated_int64_base<Signed>;
+
+    type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs)
+    {
+        return lhs + rhs;
+    }
+
+    const static type_t identity;
+};
+
+template<bool Signed>
+struct minus<emulated_int64_base<Signed> >
+{
+    using type_t = emulated_int64_base<Signed>;
+
+    type_t operator()(NBL_CONST_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs)
+    {
+        return lhs - rhs;
+    }
+
+    const static type_t identity;
+};
+
+template<>
+NBL_CONSTEXPR_INLINE emulated_uint64_t plus<emulated_uint64_t>::identity = _static_cast<emulated_uint64_t>(uint64_t(0));
+template<>
+NBL_CONSTEXPR_INLINE emulated_int64_t plus<emulated_int64_t>::identity = _static_cast<emulated_int64_t>(int64_t(0));
+template<>
+NBL_CONSTEXPR_INLINE emulated_uint64_t minus<emulated_uint64_t>::identity = _static_cast<emulated_uint64_t>(uint64_t(0));
+template<>
+NBL_CONSTEXPR_INLINE emulated_int64_t minus<emulated_int64_t>::identity = _static_cast<emulated_int64_t>(int64_t(0));
+
+// --------------------------------- Compound assignment operators ------------------------------------------
+// Specializations of the structs found in functional.hlsl
+
+template<bool Signed>
+struct plus_assign<emulated_int64_base<Signed> >
+{
+    using type_t = emulated_int64_base<Signed>;
+    using base_t = plus<type_t>;
+    base_t baseOp;
+    void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs)
+    {
+        lhs = baseOp(lhs, rhs);
+    }
+
+    const static type_t identity;
+};
+
+template<bool Signed>
+struct minus_assign<emulated_int64_base<Signed> >
+{
+    using type_t = emulated_int64_base<Signed>;
+    using base_t = minus<type_t>;
+    base_t baseOp;
+    void operator()(NBL_REF_ARG(type_t) lhs, NBL_CONST_REF_ARG(type_t) rhs)
+    {
+        lhs = baseOp(lhs, rhs);
+    }
+
+    const static type_t identity;
+};
+
+template<>
+NBL_CONSTEXPR_INLINE emulated_uint64_t plus_assign<emulated_uint64_t>::identity = plus<emulated_uint64_t>::identity;
+template<>
+NBL_CONSTEXPR_INLINE emulated_int64_t plus_assign<emulated_int64_t>::identity = plus<emulated_int64_t>::identity;
+template<>
+NBL_CONSTEXPR_INLINE emulated_uint64_t minus_assign<emulated_uint64_t>::identity = minus<emulated_uint64_t>::identity;
+template<>
+NBL_CONSTEXPR_INLINE emulated_int64_t minus_assign<emulated_int64_t>::identity = minus<emulated_int64_t>::identity;
+
+// ------------------------------------------------ TYPE TRAITS SATISFIED -----------------------------------------------------
+
+template<>
+struct is_signed<emulated_int64_t> : bool_constant<true> {};
+
+template<>
+struct is_unsigned<emulated_uint64_t> : bool_constant<true> {};
+
+// --------------------------------------------------- CONCEPTS SATISFIED -----------------------------------------------------
+namespace concepts
+{
+namespace impl
+{
+template<bool Signed>
+struct is_emulating_integral_scalar<emulated_int64_base<Signed> >
+{
+    NBL_CONSTEXPR_STATIC_INLINE bool value = true;
+};
+}
+}
+
+} //namespace nbl
+} //namespace hlsl
+
+// Declare them as signed/unsigned versions of each other
+
+#ifndef __HLSL_VERSION
+#define NBL_ADD_STD std::
+#else 
+#define NBL_ADD_STD nbl::hlsl:: 
+#endif
+
+template<>
+struct NBL_ADD_STD make_unsigned<nbl::hlsl::emulated_uint64_t> : type_identity<nbl::hlsl::emulated_uint64_t> {};
+
+template<>
+struct NBL_ADD_STD make_unsigned<nbl::hlsl::emulated_int64_t> : type_identity<nbl::hlsl::emulated_uint64_t> {};
+
+template<>
+struct NBL_ADD_STD make_signed<nbl::hlsl::emulated_uint64_t> : type_identity<nbl::hlsl::emulated_int64_t> {};
+
+template<>
+struct NBL_ADD_STD make_signed<nbl::hlsl::emulated_int64_t> : type_identity<nbl::hlsl::emulated_int64_t> {};
+
+#undef NBL_ADD_STD
+
+
+
+#endif
diff --git a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl
index 0053008aa4..3780ce001b 100644
--- a/include/nbl/builtin/hlsl/emulated/vector_t.hlsl
+++ b/include/nbl/builtin/hlsl/emulated/vector_t.hlsl
@@ -2,6 +2,7 @@
 #define _NBL_BUILTIN_HLSL_EMULATED_VECTOR_T_HLSL_INCLUDED_
 
 #include <nbl/builtin/hlsl/portable/float64_t.hlsl>
+#include <nbl/builtin/hlsl/portable/int64_t.hlsl>
 #include <nbl/builtin/hlsl/functional.hlsl>
 #include <nbl/builtin/hlsl/array_accessors.hlsl>
 #include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>
@@ -23,7 +24,7 @@ struct _2_component_vec
 
     static_assert(sizeof(T) <= 8);
 
-    NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val)
+    NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val)
     {
         if (componentIdx == 0)
             x = val;
@@ -31,7 +32,7 @@ struct _2_component_vec
             y = val;
     }
 
-    NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
+    NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
     {
         if (componentIdx == 0)
             return x;
@@ -39,9 +40,10 @@ struct _2_component_vec
             return y;
 
         // TODO: avoid code duplication, make it constexpr
-        using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
-        TAsUint invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull);
-        return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        //using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
+        //TAsUint invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull);
+        //return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        return nbl::hlsl::undef<T>();
     }
 
     NBL_CONSTEXPR_STATIC uint32_t Dimension = 2;
@@ -55,7 +57,7 @@ struct _3_component_vec
     T z;
 
 
-    NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val)
+    NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val)
     {
         if (componentIdx == 0)
             x = val;
@@ -65,7 +67,7 @@ struct _3_component_vec
             z = val;
     }
 
-    NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
+    NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
     {
         if (componentIdx == 0)
             return x;
@@ -75,9 +77,10 @@ struct _3_component_vec
             return z;
 
         // TODO: avoid code duplication, make it constexpr
-        using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
-        TAsUint invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8));
-        return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        //using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
+        //TAsUint invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8));
+        //return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        return nbl::hlsl::undef<T>();
     }
 
     NBL_CONSTEXPR_STATIC uint32_t Dimension = 3;
@@ -91,7 +94,7 @@ struct _4_component_vec
     T z;
     T w;
 
-    NBL_CONSTEXPR_INLINE_FUNC void setComponent(uint32_t componentIdx, T val)
+    NBL_CONSTEXPR_FUNC void setComponent(uint32_t componentIdx, T val)
     {
         if (componentIdx == 0)
             x = val;
@@ -103,7 +106,7 @@ struct _4_component_vec
             w = val;
     }
 
-    NBL_CONSTEXPR_INLINE_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
+    NBL_CONSTEXPR_FUNC T getComponent(uint32_t componentIdx) NBL_CONST_MEMBER_FUNC
     {
         if (componentIdx == 0)
             return x;
@@ -115,133 +118,210 @@ struct _4_component_vec
             return w;
 
         // TODO: avoid code duplication, make it constexpr
-        using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
-        uint64_t invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8));
-        return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        //using TAsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
+        //uint64_t invalidComponentValue = nbl::hlsl::_static_cast<TAsUint>(0xdeadbeefbadcaffeull >> (64 - sizeof(T) * 8));
+        //return nbl::hlsl::bit_cast<T>(invalidComponentValue);
+        return nbl::hlsl::undef<T>();
     }
 
     NBL_CONSTEXPR_STATIC uint32_t Dimension = 4;
 };
 
-template <typename ComponentType, typename CRTP, bool IsComponentTypeFundamental = is_fundamental<ComponentType>::value>
-struct emulated_vector : CRTP
-{
-    using this_t = emulated_vector<ComponentType, CRTP>;
-    using component_t = ComponentType;
-
-    NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other)
-    {
-        CRTP output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, other.getComponent(i));
-    }
-    NBL_CONSTEXPR_STATIC_INLINE this_t create(vector<component_t, CRTP::Dimension> other)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, other[i]);
-
-        return output;
-    }
-
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator+(component_t val)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, this_t::getComponent(i) + val);
-
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, this_t::getComponent(i) + other.getComponent(i));
-
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator+(vector<component_t, CRTP::Dimension> other)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, this_t::getComponent(i) + other[i]);
-
-        return output;
-    }
-    
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator-(component_t val)
-    {
-        this_t output;
+template <typename ComponentType, typename CRTP NBL_STRUCT_CONSTRAINABLE >
+struct emulated_vector;
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) - val);
+// Generic ComponentType vectors still have to be partial specialized based on whether they're fundamental and/or integral
 
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other)
-    {
-        this_t output;
-
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) - other.getComponent(i));
+#define NBL_EMULATED_VECTOR_UNARY_OPERATOR(OP)\
+NBL_CONSTEXPR_FUNC this_t operator##OP() NBL_CONST_MEMBER_FUNC \
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, this_t::getComponent(i).operator##OP());\
+    return output;\
+}
 
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator-(vector<component_t, CRTP::Dimension> other)
-    {
-        this_t output;
+#define NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\
+NBL_CONSTEXPR_FUNC this_t operator##OP (component_t val) NBL_CONST_MEMBER_FUNC \
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, this_t::getComponent(i) OP val);\
+    return output;\
+}\
+NBL_CONSTEXPR_FUNC this_t operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, this_t::getComponent(i) OP other.getComponent(i));\
+    return output;\
+}
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) - other[i]);
+#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(OP) NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(OP)\
+NBL_CONSTEXPR_FUNC this_t operator##OP(vector<component_t, CRTP::Dimension> other) NBL_CONST_MEMBER_FUNC \
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, this_t::getComponent(i) OP other[i]);\
+    return output;\
+}
 
-        return output;
-    }
+#define NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP) NBL_CONSTEXPR_FUNC vector<bool, CRTP::Dimension> operator##OP (this_t other) NBL_CONST_MEMBER_FUNC \
+{\
+    vector<bool, CRTP::Dimension> output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output[i] = CRTP::getComponent(i) OP other.getComponent(i);\
+    return output;\
+}
 
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator*(component_t val)
-    {
-        this_t output;
+#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(OP) NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(OP)\
+NBL_CONSTEXPR_FUNC vector<bool, CRTP::Dimension> operator##OP (vector<component_t, CRTP::Dimension> other) NBL_CONST_MEMBER_FUNC \
+{\
+    vector<bool, CRTP::Dimension> output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output[i] = CRTP::getComponent(i) OP other[i];\
+    return output;\
+}
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) * val);
+#define NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \
+using this_t = emulated_vector<ComponentType, CRTP>;\
+using component_t = ComponentType;\
+NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)\
+{\
+    CRTP output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, other.getComponent(i));\
+}\
+NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC \
+{\
+    component_t sum = CRTP::getComponent(0);\
+    [[unroll]]\
+    for (uint32_t i = 1u; i < CRTP::Dimension; ++i)\
+        sum = sum + CRTP::getComponent(i);\
+    return sum;\
+}
 
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other)
-    {
-        this_t output;
+#define NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM \
+NBL_CONSTEXPR_STATIC_FUNC this_t create(vector<component_t, CRTP::Dimension> other)\
+{\
+    this_t output;\
+    [[unroll]]\
+    for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
+        output.setComponent(i, other[i]);\
+    return output;\
+}
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) * other.getComponent(i));
+// Fundamental, integral
+template <typename ComponentType, typename CRTP> NBL_PARTIAL_REQ_TOP(is_fundamental_v<ComponentType> && concepts::IntegralLikeScalar<ComponentType>)
+struct emulated_vector<ComponentType, CRTP NBL_PARTIAL_REQ_BOT(is_fundamental_v<ComponentType>&& concepts::IntegralLikeScalar<ComponentType>) > : CRTP
+{
+    // Creation for fundamental type
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM
+    // Operators, including integral
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(~)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(&)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(|)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(^)
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(-)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/)
+    // Comparison operators
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=)
+};
 
-        return output;
-    }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator*(vector<component_t, CRTP::Dimension> other)
-    {
-        this_t output;
+// Fundamental, not integral
+template <typename ComponentType, typename CRTP> NBL_PARTIAL_REQ_TOP(is_fundamental_v<ComponentType> && !concepts::IntegralLikeScalar<ComponentType>)
+struct emulated_vector<ComponentType, CRTP NBL_PARTIAL_REQ_BOT(is_fundamental_v<ComponentType> && !concepts::IntegralLikeScalar<ComponentType>) > : CRTP
+{
+    // Creation for fundamental type
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM
+    // Operators
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(-)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(+)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(-)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(*)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR(/)
+    // Comparison operators
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(==)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(!=)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(<=)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>)
+    NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR(>=)
+};
 
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, CRTP::getComponent(i) * other[i]);
+// Not fundamental, integral
+template <typename ComponentType, typename CRTP> NBL_PARTIAL_REQ_TOP(!is_fundamental_v<ComponentType> && concepts::IntegralLikeScalar<ComponentType>)
+struct emulated_vector<ComponentType, CRTP NBL_PARTIAL_REQ_BOT(!is_fundamental_v<ComponentType> && concepts::IntegralLikeScalar<ComponentType>) > : CRTP
+{
+    // Creation
+    NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM
+    // Operators, including integral
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(~)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(&)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(|)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(^)
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(-)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/)
+    // Comparison operators
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=)
+};
 
-        return output;
-    }
+// Not fundamental, not integral
+template <typename ComponentType, typename CRTP> NBL_PARTIAL_REQ_TOP(!is_fundamental_v<ComponentType> && !concepts::IntegralLikeScalar<ComponentType>)
+struct emulated_vector<ComponentType, CRTP NBL_PARTIAL_REQ_BOT(!is_fundamental_v<ComponentType> && !concepts::IntegralLikeScalar<ComponentType>) > : CRTP
+{
+    // Creation
+    NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM
+    // Operators
+    NBL_EMULATED_VECTOR_UNARY_OPERATOR(-)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(+)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(-)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(*)
+    NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR(/)
+    // Comparison operators
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(==)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(!=)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(<=)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>)
+    NBL_EMULATED_VECTOR_COMPARISON_OPERATOR(>=)
+};
 
-    NBL_CONSTEXPR_INLINE_FUNC component_t calcComponentSum()
-    {
-        component_t sum = 0;
-        for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            sum = sum + CRTP::getComponent(i);
+#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_CREATION_AND_COMPONENT_SUM
+#undef NBL_EMULATED_VECTOR_CREATION_AND_COMPONENT_SUM
+#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_COMPARISON_OPERATOR
+#undef NBL_EMULATED_VECTOR_COMPARISON_OPERATOR
+#undef NBL_EMULATED_FUNDAMENTAL_TYPE_VECTOR_ARITHMETIC_OPERATOR
+#undef NBL_EMULATED_VECTOR_ARITHMETIC_OPERATOR
+#undef NBL_EMULATED_VECTOR_UNARY_OPERATOR
 
-        return sum;
-    }
-};
+// ----------------------------------------------------- EMULATED FLOAT SPECIALIZATION --------------------------------------------------------------------
 
 #define DEFINE_OPERATORS_FOR_TYPE(...)\
-NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\
+NBL_CONSTEXPR_FUNC this_t operator+(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \
 {\
     this_t output;\
     for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
@@ -250,7 +330,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator+(__VA_ARGS__ val)\
     return output;\
 }\
 \
-NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\
+NBL_CONSTEXPR_FUNC this_t operator-(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \
 {\
     this_t output;\
     for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
@@ -259,7 +339,7 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator-(__VA_ARGS__ val)\
     return output;\
 }\
 \
-NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\
+NBL_CONSTEXPR_FUNC this_t operator*(__VA_ARGS__ val) NBL_CONST_MEMBER_FUNC \
 {\
     this_t output;\
     for (uint32_t i = 0u; i < CRTP::Dimension; ++i)\
@@ -269,14 +349,14 @@ NBL_CONSTEXPR_INLINE_FUNC this_t operator*(__VA_ARGS__ val)\
 }\
 \
 
-// TODO: some of code duplication could be avoided
-template <typename ComponentType, typename CRTP>
-struct emulated_vector<ComponentType, CRTP, false> : CRTP
+
+template <bool FastMath, bool FlushDenormToZero, typename CRTP>
+struct emulated_vector<emulated_float64_t<FastMath, FlushDenormToZero>, CRTP> : CRTP
 {
-    using component_t = ComponentType;
-    using this_t = emulated_vector<ComponentType, CRTP, false>;
+    using component_t = emulated_float64_t<FastMath, FlushDenormToZero>;
+    using this_t = emulated_vector<component_t, CRTP>;
 
-    NBL_CONSTEXPR_STATIC_INLINE this_t create(this_t other)
+    NBL_CONSTEXPR_STATIC_FUNC this_t create(this_t other)
     {
         this_t output;
 
@@ -287,17 +367,17 @@ struct emulated_vector<ComponentType, CRTP, false> : CRTP
     }
 
     template<typename T>
-    NBL_CONSTEXPR_STATIC_INLINE this_t create(vector<T, CRTP::Dimension> other)
+    NBL_CONSTEXPR_STATIC_FUNC this_t create(vector<T, CRTP::Dimension> other)
     {
         this_t output;
 
         for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
-            output.setComponent(i, ComponentType::create(other[i]));
+            output.setComponent(i, component_t::create(other[i]));
 
         return output;
     }
 
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator+(this_t other)
+    NBL_CONSTEXPR_FUNC this_t operator+(this_t other) NBL_CONST_MEMBER_FUNC
     {
         this_t output;
 
@@ -306,7 +386,7 @@ struct emulated_vector<ComponentType, CRTP, false> : CRTP
 
         return output;
     }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator-(this_t other)
+    NBL_CONSTEXPR_FUNC this_t operator-(this_t other) NBL_CONST_MEMBER_FUNC
     {
         this_t output;
 
@@ -315,7 +395,7 @@ struct emulated_vector<ComponentType, CRTP, false> : CRTP
 
         return output;
     }
-    NBL_CONSTEXPR_INLINE_FUNC this_t operator*(this_t other)
+    NBL_CONSTEXPR_FUNC this_t operator*(this_t other) NBL_CONST_MEMBER_FUNC
     {
         this_t output;
 
@@ -338,9 +418,9 @@ struct emulated_vector<ComponentType, CRTP, false> : CRTP
     DEFINE_OPERATORS_FOR_TYPE(int32_t)
     DEFINE_OPERATORS_FOR_TYPE(int64_t)
 
-    NBL_CONSTEXPR_INLINE_FUNC ComponentType calcComponentSum()
+    NBL_CONSTEXPR_FUNC component_t calcComponentSum() NBL_CONST_MEMBER_FUNC
     {
-        ComponentType sum = ComponentType::create(0);
+        component_t sum = component_t::create(0);
         for (uint32_t i = 0u; i < CRTP::Dimension; ++i)
             sum = sum + CRTP::getComponent(i);
 
@@ -425,7 +505,7 @@ namespace impl
 template<typename To, typename From>
 struct static_cast_helper<emulated_vector_t2<To>, vector<From, 2>, void>
 {
-    static inline emulated_vector_t2<To> cast(vector<From, 2> vec)
+    NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t2<To> cast(NBL_CONST_REF_ARG(vector<From, 2>) vec)
     {
         emulated_vector_t2<To> output;
         output.x = _static_cast<To, From>(vec.x);
@@ -438,7 +518,7 @@ struct static_cast_helper<emulated_vector_t2<To>, vector<From, 2>, void>
 template<typename To, typename From>
 struct static_cast_helper<emulated_vector_t3<To>, vector<From, 3>, void>
 {
-    static inline emulated_vector_t3<To> cast(vector<From, 3> vec)
+    NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t3<To> cast(NBL_CONST_REF_ARG(vector<From, 3>) vec)
     {
         emulated_vector_t3<To> output;
         output.x = _static_cast<To, From>(vec.x);
@@ -452,7 +532,7 @@ struct static_cast_helper<emulated_vector_t3<To>, vector<From, 3>, void>
 template<typename To, typename From>
 struct static_cast_helper<emulated_vector_t4<To>, vector<From, 4>, void>
 {
-    static inline emulated_vector_t4<To> cast(vector<From, 4> vec)
+    NBL_CONSTEXPR_STATIC_FUNC emulated_vector_t4<To> cast(NBL_CONST_REF_ARG(vector<From, 4>) vec)
     {
         emulated_vector_t4<To> output;
         output.x = _static_cast<To, From>(vec.x);
@@ -470,12 +550,13 @@ struct static_cast_helper<vector<ToComponentType, N>, emulated_vector_t<FromComp
     using OutputVecType = vector<ToComponentType, N>;
     using InputVecType = emulated_vector_t<FromComponentType, N>;
 
-    static inline OutputVecType cast(InputVecType vec)
+    NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)
     {
         array_get<InputVecType, FromComponentType> getter;
         array_set<OutputVecType, ToComponentType> setter;
         
         OutputVecType output;
+        [[unroll]]
         for (int i = 0; i < N; ++i)
             setter(output, i, _static_cast<ToComponentType>(getter(vec, i)));
 
@@ -483,7 +564,77 @@ struct static_cast_helper<vector<ToComponentType, N>, emulated_vector_t<FromComp
     }
 };
 
-}
+#define NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(N) template<typename ToComponentType, typename FromComponentType>\
+struct static_cast_helper<emulated_vector_t##N <ToComponentType>, emulated_vector_t##N <FromComponentType>, void>\
+{\
+    using OutputVecType = emulated_vector_t##N <ToComponentType>;\
+    using InputVecType = emulated_vector_t##N <FromComponentType>;\
+    NBL_CONSTEXPR_STATIC_FUNC OutputVecType cast(NBL_CONST_REF_ARG(InputVecType) vec)\
+    {\
+        array_get<InputVecType, FromComponentType> getter;\
+        array_set<OutputVecType, ToComponentType> setter;\
+        OutputVecType output;\
+        [[unroll]]\
+        for (int i = 0; i < N; ++i)\
+            setter(output, i, _static_cast<ToComponentType>(getter(vec, i)));\
+        return output;\
+    }\
+};
+
+NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(2)
+NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(3)
+NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST(4)
+
+#undef NBL_EMULATED_VEC_TO_EMULATED_VEC_STATIC_CAST
+
+#define NBL_EMULATED_VEC_PROMOTION(N) template<typename ComponentType>\
+struct Promote<emulated_vector_t##N <ComponentType>, ComponentType>\
+{\
+    using VecType = emulated_vector_t##N <ComponentType>;\
+    NBL_CONSTEXPR_FUNC VecType operator()(NBL_CONST_REF_ARG(ComponentType) v)\
+    {\
+        array_set<VecType, ComponentType> setter;\
+        VecType promoted;\
+        [[unroll]]\
+        for (int i = 0; i < N; ++i)\
+            setter(promoted, i, v);\
+        return promoted;\
+    }\
+};
+
+NBL_EMULATED_VEC_PROMOTION(2)
+NBL_EMULATED_VEC_PROMOTION(3)
+NBL_EMULATED_VEC_PROMOTION(4)
+
+#undef NBL_EMULATED_VEC_PROMOTION
+
+#define NBL_EMULATED_VEC_TRUNCATION(N, M) template<typename ComponentType>\
+struct Truncate<emulated_vector_t##N <ComponentType>, emulated_vector_t##M <ComponentType> >\
+{\
+    using OutputVecType = emulated_vector_t##N <ComponentType>;\
+    using InputVecType = emulated_vector_t##M <ComponentType>;\
+    NBL_CONSTEXPR_FUNC OutputVecType operator()(NBL_CONST_REF_ARG(InputVecType) vec)\
+    {\
+        array_get<InputVecType, ComponentType> getter;\
+        array_set<OutputVecType, ComponentType> setter;\
+        OutputVecType output;\
+        [[unroll]]\
+        for (int i = 0; i < N; ++i)\
+            setter(output, i, getter(vec, i));\
+        return output;\
+    }\
+};
+
+NBL_EMULATED_VEC_TRUNCATION(2, 2)
+NBL_EMULATED_VEC_TRUNCATION(2, 3)
+NBL_EMULATED_VEC_TRUNCATION(2, 4)
+NBL_EMULATED_VEC_TRUNCATION(3, 3)
+NBL_EMULATED_VEC_TRUNCATION(3, 4)
+NBL_EMULATED_VEC_TRUNCATION(4, 4)
+
+#undef NBL_EMULATED_VEC_TRUNCATION
+
+} //namespace impl
 
 }
 }
diff --git a/include/nbl/builtin/hlsl/functional.hlsl b/include/nbl/builtin/hlsl/functional.hlsl
index 25d822a940..76b527f6bd 100644
--- a/include/nbl/builtin/hlsl/functional.hlsl
+++ b/include/nbl/builtin/hlsl/functional.hlsl
@@ -7,6 +7,7 @@
 
 #include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
 #include "nbl/builtin/hlsl/limits.hlsl"
+#include "nbl/builtin/hlsl/concepts/vector.hlsl"
 
 
 namespace nbl
@@ -79,7 +80,7 @@ struct reference_wrapper : enable_if_t<
 // TODO: partial specializations for T being a special SPIR-V type for image ops, etc.
 
 
-#define ALIAS_STD(NAME,OP) template<typename T> struct NAME { \
+#define ALIAS_STD(NAME,OP) template<typename T NBL_STRUCT_CONSTRAINABLE > struct NAME { \
     using type_t = T; \
     \
     T operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs) \
@@ -90,8 +91,7 @@ struct reference_wrapper : enable_if_t<
 
 #else // CPP
 
-
-#define ALIAS_STD(NAME,OP) template<typename T> struct NAME : std::NAME<T> { \
+#define ALIAS_STD(NAME,OP) template<typename T NBL_STRUCT_CONSTRAINABLE > struct NAME : std::NAME<T> { \
     using type_t = T;
 
 #endif
@@ -134,15 +134,90 @@ ALIAS_STD(divides,/)
     NBL_CONSTEXPR_STATIC_INLINE T identity = T(1);
 };
 
+#ifndef __HLSL_VERSION
+
+template<typename T NBL_STRUCT_CONSTRAINABLE > 
+struct bit_not : std::bit_not<T>
+{
+    using type_t = T;
+};
+
+#else
+
+template<typename T NBL_STRUCT_CONSTRAINABLE >
+struct bit_not
+{
+    using type_t = T;
+
+    T operator()(NBL_CONST_REF_ARG(T) operand) 
+    { 
+        return ~operand; 
+    }
+};
+
+// The default version above only works for fundamental scalars, vectors and matrices. This is because you can't call `~x` unless `x` is one of the former.
+// Similarly, calling `x.operator~()` is not valid for the aforementioned, and only for types overriding this operator. So, we need a specialization.
+template<typename T> NBL_PARTIAL_REQ_TOP(!(concepts::Scalar<T> || concepts::Vector<T> || concepts::Matrix<T>))
+struct bit_not<T NBL_PARTIAL_REQ_BOT(!(concepts::Scalar<T> || concepts::Vector<T> || concepts::Matrix<T>)) >
+{
+    using type_t = T;
+
+    T operator()(NBL_CONST_REF_ARG(T) operand)
+    {
+        return operand.operator~();
+    }
+};
+
+#endif
 
-ALIAS_STD(greater,>) };
-ALIAS_STD(less,<) };
-ALIAS_STD(greater_equal,>=) };
-ALIAS_STD(less_equal,<=) };
+ALIAS_STD(equal_to, ==) };
+ALIAS_STD(not_equal_to, !=) };
+ALIAS_STD(greater, >) };
+ALIAS_STD(less, <) };
+ALIAS_STD(greater_equal, >=) };
+ALIAS_STD(less_equal, <=) };
 
 #undef ALIAS_STD
 
-// ------------------------ Compound assignment operators ----------------------
+// The above comparison operators return bool on STD, but in HLSL they're supposed to yield bool vectors, so here's a specialization so that they return `vector<bool, N>` for vectorial types
+
+// GLM doesn't have operators on vectors
+#ifndef __HLSL_VERSION
+
+#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template<typename T> NBL_PARTIAL_REQ_TOP(concepts::Vectorial<T>)\
+struct NAME <T NBL_PARTIAL_REQ_BOT(concepts::Vectorial<T>) >\
+{\
+    using type_t = T;\
+    vector<bool, vector_traits<T>::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\
+    {\
+        return glm::GLM_OP (lhs, rhs);\
+    }\
+};
+
+#else 
+
+#define NBL_COMPARISON_VECTORIAL_SPECIALIZATION(NAME, OP, GLM_OP) template<typename T> NBL_PARTIAL_REQ_TOP(concepts::Vectorial<T>)\
+struct NAME <T NBL_PARTIAL_REQ_BOT(concepts::Vectorial<T>) >\
+{\
+    using type_t = T;\
+    vector<bool, vector_traits<T>::Dimension> operator()(NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)\
+    {\
+        return lhs OP rhs;\
+    }\
+};
+
+#endif
+
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(equal_to, ==, equal)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(not_equal_to, !=, notEqual)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater, >, greaterThan)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less, <, lessThan)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(greater_equal, >=, greaterThanEqual)
+NBL_COMPARISON_VECTORIAL_SPECIALIZATION(less_equal, <=, lessThanEqual)
+
+#undef NBL_COMPARISON_VECTORIAL_SPECIALIZATION
+
+// ------------------------------------------------------------- COMPOUND ASSIGNMENT OPERATORS --------------------------------------------------------------------
 
 #define COMPOUND_ASSIGN(NAME) template<typename T> struct NAME##_assign { \
     using type_t = T; \
@@ -163,9 +238,9 @@ COMPOUND_ASSIGN(divides)
 
 #undef COMPOUND_ASSIGN
 
-// ----------------- End of compound assignment ops ----------------
+// ---------------------------------------------------------------- MIN, MAX, TERNARY -------------------------------------------------------------------------
 
-// Min, Max and Ternary Operator don't use ALIAS_STD because they don't exist in STD
+// Min, Max, and Ternary and Shift operators don't use ALIAS_STD because they don't exist in STD
 // TODO: implement as mix(rhs<lhs,lhs,rhs) (SPIR-V intrinsic from the extended set & glm on C++)
 template<typename T>
 struct minimum
@@ -195,18 +270,226 @@ struct maximum
     NBL_CONSTEXPR_STATIC_INLINE T identity = numeric_limits<scalar_t>::lowest; // TODO: `all_components<T>`
 };
 
-template<typename T>
+template<typename T NBL_STRUCT_CONSTRAINABLE >
 struct ternary_operator
 {
     using type_t = T;
 
-    T operator()(bool condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(bool) condition, NBL_CONST_REF_ARG(T) lhs, NBL_CONST_REF_ARG(T) rhs)
+    {
+        return select<bool, T>(condition, lhs, rhs);
+    }
+};
+
+// ----------------------------------------------------------------- SHIFT OPERATORS --------------------------------------------------------------------
+
+template<typename T NBL_STRUCT_CONSTRAINABLE >
+struct left_shift_operator
+{
+    using type_t = T;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        return operand << bits;
+    }
+};
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::IntVector<T>)
+struct left_shift_operator<T NBL_PARTIAL_REQ_BOT(concepts::IntVector<T>) >
+{
+    using type_t = T;
+    using scalar_t = scalar_type_t<T>;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        return operand << bits;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits)
     {
-        return condition ? lhs : rhs;
+        return operand << bits;
     }
 };
 
-}
-}
+template<typename T> NBL_PARTIAL_REQ_TOP(!concepts::IntVector<T> && concepts::IntegralLikeVectorial<T>)
+struct left_shift_operator<T NBL_PARTIAL_REQ_BOT(!concepts::IntVector<T> && concepts::IntegralLikeVectorial<T>) >
+{
+    using type_t = T;
+    using scalar_t = typename vector_traits<T>::scalar_type;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v<T>);
+        left_shift_operator<scalar_t> leftShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, leftShift(getter(operand, i), getter(bits, i)));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v<T>);
+        left_shift_operator<scalar_t> leftShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, leftShift(getter(operand, i), bits));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector<uint16_t, vector_traits<T>::Dimension>) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v<T>);
+        left_shift_operator<scalar_t> leftShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, leftShift(getter(operand, i), bits[i]));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v<T>);
+        left_shift_operator<scalar_t> leftShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, leftShift(getter(operand, i), bits));
+        }
+        return shifted;
+    }
+};
+
+template<typename T NBL_STRUCT_CONSTRAINABLE >
+struct arithmetic_right_shift_operator
+{
+    using type_t = T;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        return operand >> bits;
+    }
+};
+
+template<typename T> NBL_PARTIAL_REQ_TOP(concepts::IntVector<T>)
+struct arithmetic_right_shift_operator<T NBL_PARTIAL_REQ_BOT(concepts::IntVector<T>) >
+{
+    using type_t = T;
+    using scalar_t = scalar_type_t<T>;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        return operand >> bits;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits)
+    {
+        return operand >> bits;
+    }
+};
+
+template<typename T> NBL_PARTIAL_REQ_TOP(!concepts::IntVector<T>&& concepts::IntegralLikeVectorial<T>)
+struct arithmetic_right_shift_operator<T NBL_PARTIAL_REQ_BOT(!concepts::IntVector<T>&& concepts::IntegralLikeVectorial<T>) >
+{
+    using type_t = T;
+    using scalar_t = typename vector_traits<T>::scalar_type;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v<T>);
+        arithmetic_right_shift_operator<scalar_t> rightShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, rightShift(getter(operand, i), getter(bits, i)));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(scalar_t) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v<T>);
+        arithmetic_right_shift_operator<scalar_t> rightShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, rightShift(getter(operand, i), bits));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(vector<uint16_t, vector_traits<T>::Dimension>) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v<T>);
+        arithmetic_right_shift_operator<scalar_t> rightShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, rightShift(getter(operand, i), bits[i]));
+        }
+        return shifted;
+    }
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(uint16_t) bits)
+    {
+        array_get<T, scalar_t> getter;
+        array_set<T, scalar_t> setter;
+        NBL_CONSTEXPR_STATIC uint16_t extent = uint16_t(extent_v<T>);
+        arithmetic_right_shift_operator<scalar_t> rightShift;
+        T shifted;
+        [[unroll]]
+        for (uint16_t i = 0; i < extent; i++)
+        {
+            setter(shifted, i, rightShift(getter(operand, i), bits));
+        }
+        return shifted;
+    }
+};
+
+// Left unimplemented for vectorial types by default
+template<typename T NBL_STRUCT_CONSTRAINABLE >
+struct logical_right_shift_operator
+{
+    using type_t = T;
+    using unsigned_type_t = make_unsigned_t<T>;
+
+    NBL_CONSTEXPR_FUNC T operator()(NBL_CONST_REF_ARG(T) operand, NBL_CONST_REF_ARG(T) bits)
+    {
+        arithmetic_right_shift_operator<unsigned_type_t> arithmeticRightShift;
+        return _static_cast<T>(arithmeticRightShift(_static_cast<unsigned_type_t>(operand), _static_cast<unsigned_type_t>(bits)));
+    }
+};
+
+
+
+} //namespace nbl
+} //namespace hlsl
 
 #endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/ieee754.hlsl b/include/nbl/builtin/hlsl/ieee754.hlsl
index 4b281c2111..72a9d2fe59 100644
--- a/include/nbl/builtin/hlsl/ieee754.hlsl
+++ b/include/nbl/builtin/hlsl/ieee754.hlsl
@@ -89,7 +89,7 @@ inline int extractExponent(T x)
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size<sizeof(T)>::type biasedExp)
+NBL_CONSTEXPR_FUNC T replaceBiasedExponent(T x, typename unsigned_integer_of_size<sizeof(T)>::type biasedExp)
 {
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
 	return impl::castBackToFloatType<T>(glsl::bitfieldInsert(ieee754::impl::bitCastToUintType(x), biasedExp, traits<AsFloat>::mantissaBitCnt, traits<AsFloat>::exponentBitCnt));
@@ -97,20 +97,20 @@ NBL_CONSTEXPR_INLINE_FUNC T replaceBiasedExponent(T x, typename unsigned_integer
 
 // performs no overflow tests, returns x*exp2(n)
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC T fastMulExp2(T x, int n)
+NBL_CONSTEXPR_FUNC T fastMulExp2(T x, int n)
 {
 	return replaceBiasedExponent(x, extractBiasedExponent(x) + uint32_t(n));
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractMantissa(T x)
+NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractMantissa(T x)
 {
 	using AsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
 	return ieee754::impl::bitCastToUintType(x) & traits<typename float_of_size<sizeof(T)>::type>::mantissaMask;
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractNormalizeMantissa(T x)
+NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractNormalizeMantissa(T x)
 {
 	using AsUint = typename unsigned_integer_of_size<sizeof(T)>::type;
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
@@ -118,21 +118,21 @@ NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type ext
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractSign(T x)
+NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractSign(T x)
 {
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
 	return (ieee754::impl::bitCastToUintType(x) & traits<AsFloat>::signMask) >> ((sizeof(T) * 8) - 1);
 }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractSignPreserveBitPattern(T x)
+NBL_CONSTEXPR_FUNC typename unsigned_integer_of_size<sizeof(T)>::type extractSignPreserveBitPattern(T x)
 {
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
 	return ieee754::impl::bitCastToUintType(x) & traits<AsFloat>::signMask;
 }
 
 template <typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>)
-NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from)
+NBL_CONSTEXPR_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint from)
 {
 	using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type;
 
@@ -143,7 +143,7 @@ NBL_CONSTEXPR_INLINE_FUNC FloatingPoint copySign(FloatingPoint to, FloatingPoint
 }
 
 template <typename FloatingPoint NBL_FUNC_REQUIRES(concepts::FloatingPointLikeScalar<FloatingPoint>)
-NBL_CONSTEXPR_INLINE_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true)
+NBL_CONSTEXPR_FUNC FloatingPoint flipSign(FloatingPoint val, bool flip = true)
 {
 	using AsFloat = typename float_of_size<sizeof(FloatingPoint)>::type;
 	using AsUint = typename unsigned_integer_of_size<sizeof(FloatingPoint)>::type;
diff --git a/include/nbl/builtin/hlsl/ieee754/impl.hlsl b/include/nbl/builtin/hlsl/ieee754/impl.hlsl
index ad8a3f9228..69fba9795f 100644
--- a/include/nbl/builtin/hlsl/ieee754/impl.hlsl
+++ b/include/nbl/builtin/hlsl/ieee754/impl.hlsl
@@ -15,25 +15,25 @@ namespace ieee754
 namespace impl
 {
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<sizeof(T)> bitCastToUintType(T x)
+NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<sizeof(T)> bitCastToUintType(T x)
 {
 	using AsUint = unsigned_integer_of_size_t<sizeof(T)>;
 	return bit_cast<AsUint, T>(x);
 }
 // to avoid bit cast from uintN_t to uintN_t
-template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; }
-template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; }
-template <> NBL_CONSTEXPR_INLINE_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; }
+template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<2> bitCastToUintType(uint16_t x) { return x; }
+template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<4> bitCastToUintType(uint32_t x) { return x; }
+template <> NBL_CONSTEXPR_FUNC unsigned_integer_of_size_t<8> bitCastToUintType(uint64_t x) { return x; }
 
 template <typename T>
-NBL_CONSTEXPR_INLINE_FUNC T castBackToFloatType(T x)
+NBL_CONSTEXPR_FUNC T castBackToFloatType(T x)
 {
 	using AsFloat = typename float_of_size<sizeof(T)>::type;
 	return bit_cast<AsFloat, T>(x);
 }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; }
-template<> NBL_CONSTEXPR_INLINE_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; }
+template<> NBL_CONSTEXPR_FUNC uint16_t castBackToFloatType(uint16_t x) { return x; }
+template<> NBL_CONSTEXPR_FUNC uint32_t castBackToFloatType(uint32_t x) { return x; }
+template<> NBL_CONSTEXPR_FUNC uint64_t castBackToFloatType(uint64_t x) { return x; }
 }
 
 }
diff --git a/include/nbl/builtin/hlsl/morton.hlsl b/include/nbl/builtin/hlsl/morton.hlsl
new file mode 100644
index 0000000000..d570e249c8
--- /dev/null
+++ b/include/nbl/builtin/hlsl/morton.hlsl
@@ -0,0 +1,624 @@
+#ifndef _NBL_BUILTIN_HLSL_MORTON_INCLUDED_
+#define _NBL_BUILTIN_HLSL_MORTON_INCLUDED_
+
+#include "nbl/builtin/hlsl/cpp_compat.hlsl"
+#include "nbl/builtin/hlsl/concepts/core.hlsl"
+#include "nbl/builtin/hlsl/bit.hlsl"
+#include "nbl/builtin/hlsl/functional.hlsl"
+#include "nbl/builtin/hlsl/emulated/int64_t.hlsl"
+#include "nbl/builtin/hlsl/mpl.hlsl"
+#include "nbl/builtin/hlsl/portable/vector_t.hlsl"
+
+// TODO: mega macro to get functional plus, minus, plus_assign, minus_assign
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace morton
+{
+
+namespace impl
+{
+
+// Valid dimension for a morton code
+template <uint16_t D>
+NBL_BOOL_CONCEPT Dimension = 1 < D && D < 5;
+
+// --------------------------------------------------------- MORTON ENCODE/DECODE MASKS ---------------------------------------------------
+
+NBL_CONSTEXPR uint16_t CodingStages = 5;
+
+template<uint16_t Dim, uint16_t Bits, uint16_t Stage>
+struct coding_mask;
+
+template<uint16_t Dim, uint16_t Bits, uint16_t Stage, typename T = uint64_t>
+NBL_CONSTEXPR T coding_mask_v = _static_cast<T>(coding_mask<Dim, Bits, Stage>::value);
+
+// It's a complete cointoss whether template variables work or not, since it's a C++14 feature (not supported in HLSL2021). Most of the ones we use in Nabla work,
+// but this one will only work for some parameters and not for others. Therefore, this was made into a macro to inline where used
+
+#define NBL_MORTON_INTERLEAVE_MASKS(STORAGE_T, DIM, BITS, NAMESPACE_PREFIX) _static_cast<portable_vector_t< STORAGE_T, DIM > >(\
+                                                                            truncate<vector<uint64_t, DIM > >(\
+                                                                            vector<uint64_t, 4>(NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0>,\
+                                                                                                NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 1,\
+                                                                                                NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 2,\
+                                                                                                NAMESPACE_PREFIX coding_mask_v< DIM, BITS, 0> << 3)))
+
+
+template<uint16_t Dim, uint16_t Bits>
+struct sign_mask : integral_constant<uint64_t, uint64_t(1) << ((Bits - 1) * Dim)> {};
+
+template<uint16_t Dim, uint16_t Bits, typename T = uint64_t>
+NBL_CONSTEXPR T sign_mask_v = _static_cast<T>(sign_mask<Dim, Bits>::value);
+
+#define NBL_MORTON_SIGN_MASKS(STORAGE_T, DIM, BITS) _static_cast<portable_vector_t< STORAGE_T, DIM > >(\
+                                                    truncate<vector<uint64_t, DIM> >(\
+                                                    vector<uint64_t, 4>(sign_mask_v< DIM, BITS >,\
+                                                                        sign_mask_v< DIM, BITS > << 1,\
+                                                                        sign_mask_v< DIM, BITS > << 2,\
+                                                                        sign_mask_v< DIM, BITS > << 3)))
+
+// 0th stage will be special: to avoid masking twice during encode/decode, and to get a proper mask that only gets the relevant bits out of a morton code, the 0th stage
+// mask also considers the total number of bits we're cnsidering for a code (all other masks operate on a bit-agnostic basis).
+#define NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(DIM, BASE_VALUE) template<uint16_t Bits> struct coding_mask<DIM, Bits, 0>\
+{\
+    enum : uint64_t { _Bits = Bits };\
+    NBL_CONSTEXPR_STATIC_INLINE uint64_t KilloffMask = _Bits * DIM < 64 ? (uint64_t(1) << (_Bits * DIM)) - 1 : ~uint64_t(0);\
+    NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE) & KilloffMask;\
+};
+
+#define NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(DIM, STAGE, BASE_VALUE) template<uint16_t Bits> struct coding_mask<DIM, Bits, STAGE>\
+{\
+    NBL_CONSTEXPR_STATIC_INLINE uint64_t value = uint64_t(BASE_VALUE);\
+};
+
+// Final stage mask also counts exact number of bits, although maybe it's not necessary
+#define NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS template<uint16_t Dim, uint16_t Bits> struct coding_mask<Dim, Bits, CodingStages>\
+{\
+    enum : uint64_t { _Bits = Bits };\
+    NBL_CONSTEXPR_STATIC_INLINE uint64_t value = (uint64_t(1) << _Bits) - 1;\
+};
+
+NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(2, 0x5555555555555555ull)        // Groups bits by 1  on, 1  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 1, 0x3333333333333333ull) // Groups bits by 2  on, 2  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 2, 0x0F0F0F0F0F0F0F0Full) // Groups bits by 4  on, 4  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 3, 0x00FF00FF00FF00FFull) // Groups bits by 8  on, 8  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(2, 4, 0x0000FFFF0000FFFFull) // Groups bits by 16 on, 16 off
+
+NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(3, 0x9249249249249249ull)        // Groups bits by 1  on, 2  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 1, 0x30C30C30C30C30C3ull) // Groups bits by 2  on, 4  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 2, 0xF00F00F00F00F00Full) // Groups bits by 4  on, 8  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 3, 0x00FF0000FF0000FFull) // Groups bits by 8  on, 16 off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(3, 4, 0xFFFF00000000FFFFull) // Groups bits by 16 on, 32 off
+
+NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK(4, 0x1111111111111111ull)        // Groups bits by 1  on, 3  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 1, 0x0303030303030303ull) // Groups bits by 2  on, 6  off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 2, 0x000F000F000F000Full) // Groups bits by 4  on, 12 off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 3, 0x000000FF000000FFull) // Groups bits by 8  on, 24 off
+NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK(4, 4, 0x000000000000FFFFull) // Groups bits by 16 on, 48 off (unused but here for completion + likely keeps compiler from complaining)
+
+NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASKS
+
+#undef NBL_HLSL_MORTON_SPECIALIZE_LAST_CODING_MASK
+#undef NBL_HLSL_MORTON_SPECIALIZE_CODING_MASK
+#undef NBL_HLSL_MORTON_SPECIALIZE_FIRST_CODING_MASK
+
+// ----------------------------------------------------------------- MORTON TRANSCODER ---------------------------------------------------
+template<uint16_t Dim, uint16_t Bits, typename encode_t NBL_PRIMARY_REQUIRES(Dimension<Dim> && Dim * Bits <= 64 && 8 * sizeof(encode_t) == mpl::max_v<uint64_t, mpl::round_up_to_pot_v<Dim * Bits>, uint64_t(16)>)
+struct Transcoder
+{
+    template<typename decode_t = conditional_t<(Bits > 16), vector<uint32_t, Dim>, vector<uint16_t, Dim> >
+    NBL_FUNC_REQUIRES(concepts::IntVector<decode_t> && 8 * sizeof(typename vector_traits<decode_t>::scalar_type) >= Bits)
+    /**
+    * @brief Interleaves each coordinate with `Dim - 1` zeros inbetween each bit, and left-shifts each by their coordinate index
+    *
+    * @param [in] decodedValue Cartesian coordinates to interleave and shift
+    */
+    NBL_CONSTEXPR_STATIC_FUNC portable_vector_t<encode_t, Dim> interleaveShift(NBL_CONST_REF_ARG(decode_t) decodedValue)
+    {
+        left_shift_operator<portable_vector_t<encode_t, Dim> > leftShift;
+        portable_vector_t<encode_t, Dim> interleaved = _static_cast<portable_vector_t<encode_t, Dim> >(decodedValue) & coding_mask_v<Dim, Bits, CodingStages, encode_t>;
+
+        #define ENCODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\
+        {\
+            interleaved = interleaved | leftShift(interleaved, (uint16_t(1) << I) * (Dim - 1));\
+            interleaved = interleaved & coding_mask_v<Dim, Bits, I, encode_t>;\
+        }
+        ENCODE_LOOP_ITERATION(4)
+        ENCODE_LOOP_ITERATION(3)
+        ENCODE_LOOP_ITERATION(2)
+        ENCODE_LOOP_ITERATION(1)
+        ENCODE_LOOP_ITERATION(0)
+
+        #undef ENCODE_LOOP_ITERATION
+
+        // After interleaving, shift each coordinate left by their index
+        return leftShift(interleaved, truncate<vector<uint16_t, Dim> >(vector<uint16_t, 4>(0, 1, 2, 3)));
+    }
+
+    template<typename decode_t = conditional_t<(Bits > 16), vector<uint32_t, Dim>, vector<uint16_t, Dim> >
+    NBL_FUNC_REQUIRES(concepts::IntVector<decode_t> && 8 * sizeof(typename vector_traits<decode_t>::scalar_type) >= Bits)
+    /**
+    * @brief Encodes a vector of cartesian coordinates as a Morton code
+    *
+    * @param [in] decodedValue Cartesian coordinates to encode
+    */
+    NBL_CONSTEXPR_STATIC_FUNC encode_t encode(NBL_CONST_REF_ARG(decode_t) decodedValue)
+    {
+        const portable_vector_t<encode_t, Dim> interleaveShifted = interleaveShift<decode_t>(decodedValue);
+
+        array_get<portable_vector_t<encode_t, Dim>, encode_t> getter;
+        encode_t encoded = getter(interleaveShifted, 0);
+
+        [[unroll]]
+        for (uint16_t i = 1; i < Dim; i++)
+            encoded = encoded | getter(interleaveShifted, i);
+
+        return encoded;
+    }
+
+    template<typename decode_t = conditional_t<(Bits > 16), vector<uint32_t, Dim>, vector<uint16_t, Dim> >
+    NBL_FUNC_REQUIRES(concepts::IntVector<decode_t> && 8 * sizeof(typename vector_traits<decode_t>::scalar_type) >= Bits)
+    /**
+    * @brief Decodes a Morton code back to a vector of cartesian coordinates
+    *
+    * @param [in] encodedValue Representation of a Morton code (binary code, not the morton class defined below)
+    */
+    NBL_CONSTEXPR_STATIC_FUNC decode_t decode(NBL_CONST_REF_ARG(encode_t) encodedValue)
+    {
+        arithmetic_right_shift_operator<encode_t> encodedRightShift;
+        portable_vector_t<encode_t, Dim> decoded;
+        array_set<portable_vector_t<encode_t, Dim>, encode_t> setter;
+        // Write initial values into decoded
+        [[unroll]]
+        for (uint16_t i = 0; i < Dim; i++)
+            setter(decoded, i, encodedRightShift(encodedValue, i));
+
+        arithmetic_right_shift_operator<portable_vector_t<encode_t, Dim> > rightShift;
+
+        #define DECODE_LOOP_ITERATION(I) NBL_IF_CONSTEXPR(Bits > (uint16_t(1) << I))\
+        {\
+            decoded = decoded & coding_mask_v<Dim, Bits, I, encode_t>;\
+            decoded = decoded | rightShift(decoded, (uint16_t(1) << I) * (Dim - 1));\
+        }
+
+        DECODE_LOOP_ITERATION(0)
+        DECODE_LOOP_ITERATION(1)
+        DECODE_LOOP_ITERATION(2)
+        DECODE_LOOP_ITERATION(3)
+        DECODE_LOOP_ITERATION(4)
+
+        #undef DECODE_LOOP_ITERATION
+
+        // If `Bits` is greater than half the bitwidth of the decode type, then we can avoid `&`ing against the last mask since duplicated MSB get truncated
+        NBL_IF_CONSTEXPR(Bits > 4 * sizeof(typename vector_traits<decode_t>::scalar_type))
+            return _static_cast<decode_t>(decoded);
+        else
+            return _static_cast<decode_t>(decoded & coding_mask_v<Dim, Bits, CodingStages, encode_t>);
+    }
+};
+
+// ---------------------------------------------------- COMPARISON OPERATORS ---------------------------------------------------------------
+// Here because no partial specialization of methods
+// `BitsAlreadySpread` assumes both pre-interleaved and pre-shifted
+
+template<bool Signed, uint16_t Bits, typename storage_t, bool BitsAlreadySpread, typename I>
+NBL_BOOL_CONCEPT Comparable = concepts::IntegralLikeScalar<I> && is_signed_v<I> == Signed && ((BitsAlreadySpread && sizeof(I) == sizeof(storage_t)) || (!BitsAlreadySpread && 8 * sizeof(I) == mpl::max_v<uint64_t, mpl::round_up_to_pot_v<Bits>, uint64_t(16)>));
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct Equals;
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t>
+struct Equals<Signed, Bits, D, storage_t, true>
+{
+    template<typename I NBL_FUNC_REQUIRES(Comparable<Signed, Bits, storage_t, true, I>)
+    NBL_CONSTEXPR_STATIC_FUNC vector<bool, D> __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t<I, D>) rhs)
+    {
+        const portable_vector_t<storage_t, D> InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, );
+        const portable_vector_t<storage_t, D> zeros = _static_cast<portable_vector_t<storage_t, D> >(truncate<vector<uint64_t, D> >(vector<uint64_t, 4>(0,0,0,0)));
+        
+        const portable_vector_t<storage_t, D> rhsCasted = _static_cast<portable_vector_t<storage_t, D> >(rhs);
+        const portable_vector_t<storage_t, D> xored = rhsCasted ^ (InterleaveMasks & value);
+        equal_to<portable_vector_t<storage_t, D> > equal;
+        return equal(xored, zeros);
+    }
+};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t>
+struct Equals<Signed, Bits, D, storage_t, false>
+{
+    template<typename I NBL_FUNC_REQUIRES(Comparable<Signed, Bits, storage_t, false, I>)
+    NBL_CONSTEXPR_STATIC_FUNC vector<bool, D> __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector<I, D>) rhs)
+    {
+        using right_sign_t = conditional_t<Signed, make_signed_t<storage_t>, make_unsigned_t<storage_t> >;
+        const portable_vector_t<right_sign_t, D> interleaved = _static_cast<portable_vector_t<right_sign_t, D> >(Transcoder<D, Bits, storage_t>::interleaveShift(rhs));
+        return Equals<Signed, Bits, D, storage_t, true>::template __call<right_sign_t>(value, interleaved);
+    }
+};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread, typename ComparisonOp>
+struct BaseComparison;
+
+// Aux variable that has only the sign bit for the first of D dimensions
+template<uint16_t Bits, uint16_t D>
+NBL_CONSTEXPR uint64_t SignMask = uint64_t(1) << (D * (Bits - 1));
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, typename ComparisonOp>
+struct BaseComparison<Signed, Bits, D, storage_t, true, ComparisonOp>
+{
+    template<typename I NBL_FUNC_REQUIRES(Comparable<Signed, Bits, storage_t, true, I>)
+    NBL_CONSTEXPR_STATIC_FUNC vector<bool, D> __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(portable_vector_t<I, D>) rhs)
+    {
+        const portable_vector_t<storage_t, D> InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, );
+        const portable_vector_t<storage_t, D> SignMasks = NBL_MORTON_SIGN_MASKS(storage_t, D, Bits);
+        ComparisonOp comparison;
+        NBL_IF_CONSTEXPR(Signed)
+        {
+            // Obtain a vector of deinterleaved coordinates and flip their sign bits
+            portable_vector_t<storage_t, D> thisCoord = (InterleaveMasks & value) ^ SignMasks;
+            // rhs already deinterleaved, just have to cast type and flip sign
+            const portable_vector_t<storage_t, D> rhsCoord = _static_cast<portable_vector_t<storage_t, D> >(rhs) ^ SignMasks;
+
+            return comparison(thisCoord, rhsCoord);
+        }
+        else 
+        {
+            // Obtain a vector of deinterleaved coordinates
+            portable_vector_t<storage_t, D> thisCoord = InterleaveMasks & value;
+            // rhs already deinterleaved, just have to cast type
+            const portable_vector_t<storage_t, D> rhsCoord = _static_cast<portable_vector_t<storage_t, D> >(rhs);
+
+            return comparison(thisCoord, rhsCoord);
+        }
+        
+    }
+};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, typename ComparisonOp>
+struct BaseComparison<Signed, Bits, D, storage_t, false, ComparisonOp>
+{
+    template<typename I NBL_FUNC_REQUIRES(Comparable<Signed, Bits, storage_t, false, I>)
+    NBL_CONSTEXPR_STATIC_FUNC vector<bool, D> __call(NBL_CONST_REF_ARG(storage_t) value, NBL_CONST_REF_ARG(vector<I, D>) rhs)
+    {
+        using right_sign_t = conditional_t<Signed, make_signed_t<storage_t>, make_unsigned_t<storage_t> >;
+        const portable_vector_t<right_sign_t, D> interleaved = _static_cast<portable_vector_t<right_sign_t, D> >(Transcoder<D, Bits, storage_t>::interleaveShift(rhs));
+        return BaseComparison<Signed, Bits, D, storage_t, true, ComparisonOp>::template __call<right_sign_t>(value, interleaved);
+    }
+};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct LessThan : BaseComparison<Signed, Bits, D, storage_t, BitsAlreadySpread, less<portable_vector_t<storage_t, D> > > {};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct LessEquals : BaseComparison<Signed, Bits, D, storage_t, BitsAlreadySpread, less_equal<portable_vector_t<storage_t, D> > > {};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct GreaterThan : BaseComparison<Signed, Bits, D, storage_t, BitsAlreadySpread, greater<portable_vector_t<storage_t, D> > > {};
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename storage_t, bool BitsAlreadySpread>
+struct GreaterEquals : BaseComparison<Signed, Bits, D, storage_t, BitsAlreadySpread, greater_equal<portable_vector_t<storage_t, D> > > {};
+
+} //namespace impl
+
+// Making this even slightly less ugly is blocked by https://github.com/microsoft/DirectXShaderCompiler/issues/7006
+// In particular, `Masks` should be a `const static` member field instead of appearing in every method using it
+template<bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t = uint64_t NBL_PRIMARY_REQUIRES(impl::Dimension<D> && D * Bits <= 64)
+struct code
+{
+    using this_t = code<Signed, Bits, D, _uint64_t>;
+    using this_signed_t = code<true, Bits, D, _uint64_t>;
+    NBL_CONSTEXPR_STATIC uint16_t TotalBitWidth = D * Bits;
+    using storage_t = conditional_t<(TotalBitWidth > 16), conditional_t<(TotalBitWidth > 32), _uint64_t, uint32_t>, uint16_t>;
+
+    storage_t value;
+
+    // ---------------------------------------------------- CONSTRUCTORS ---------------------------------------------------------------
+
+    #ifndef __HLSL_VERSION
+
+    code() = default;
+
+    #endif
+
+    /**
+    * @brief Creates a Morton code from a set of integral cartesian coordinates
+    *
+    * @param [in] cartesian Coordinates to encode. Signedness MUST match the signedness of this Morton code class
+    */
+    template<typename I>
+    NBL_CONSTEXPR_STATIC_FUNC enable_if_t<is_integral_v<I> && is_scalar_v<I> && (is_signed_v<I> == Signed) && (8 * sizeof(I) >= Bits), this_t>
+    create(NBL_CONST_REF_ARG(vector<I, D>) cartesian)
+    {
+        this_t retVal;
+        retVal.value = impl::Transcoder<D, Bits, storage_t>::encode(cartesian);
+        return retVal;
+    }
+
+    // CPP can also have an actual constructor
+    #ifndef __HLSL_VERSION
+
+    /**
+    * @brief Creates a Morton code from a set of cartesian coordinates
+    *
+    * @param [in] cartesian Coordinates to encode
+    */
+    template<typename I NBL_FUNC_REQUIRES(8 * sizeof(I) >= Bits)
+    inline explicit code(NBL_CONST_REF_ARG(vector<I, D>) cartesian)
+    {
+        *this = create(cartesian);
+    }
+
+    /**
+    * @brief Decodes this Morton code back to a set of cartesian coordinates
+    */
+    template<typename I NBL_FUNC_REQUIRES(8 * sizeof(I) >= Bits && is_signed_v<I> == Signed)
+    constexpr explicit operator vector<I, D>() const noexcept;
+
+    #endif
+
+    // ------------------------------------------------------- BITWISE OPERATORS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC this_t operator&(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal;
+        retVal.value = value & rhs.value;
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator|(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal;
+        retVal.value = value | rhs.value;
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator^(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal;
+        retVal.value = value ^ rhs.value;
+        return retVal;
+    }
+
+    NBL_CONSTEXPR_FUNC this_t operator~() NBL_CONST_MEMBER_FUNC
+    {
+        this_t retVal;
+        retVal.value = ~value;
+        return retVal;
+    }
+
+    // Only valid in CPP
+    #ifndef __HLSL_VERSION
+
+    constexpr this_t operator<<(uint16_t bits) const;
+
+    constexpr this_t operator>>(uint16_t bits) const;
+
+    #endif
+
+    // ------------------------------------------------------- UNARY ARITHMETIC OPERATORS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC this_signed_t operator-() NBL_CONST_MEMBER_FUNC
+    {
+        this_t zero;
+        zero.value = _static_cast<storage_t>(0);
+        #ifndef __HLSL_VERSION
+        return zero - *this;
+        #else
+        return zero - this;
+        #endif
+    }
+
+    // ------------------------------------------------------- BINARY ARITHMETIC OPERATORS -------------------------------------------------
+
+    // put 1 bits everywhere in the bits the current axis is not using
+    // then extract just the axis bits for the right hand coordinate
+    // carry-1 will propagate the bits across the already set bits
+    // then clear out the bits not belonging to current axis
+    // Note: Its possible to clear on `this` and fill on `rhs` but that will
+    // disable optimizations, we expect the compiler to optimize a lot if the
+    // value of `rhs` is known at compile time, e.g. `static_cast<Morton<N>>(glm::ivec3(1,0,0))`
+    NBL_CONSTEXPR_FUNC this_t operator+(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        const portable_vector_t<storage_t, D> InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::);
+        bit_not<portable_vector_t<storage_t, D> > bitnot;
+        // For each coordinate, leave its bits intact and turn every other bit ON
+        const portable_vector_t<storage_t, D> counterMaskedValue = bitnot(InterleaveMasks) | value;
+        // For each coordinate in rhs, leave its bits intact and turn every other bit OFF
+        const portable_vector_t<storage_t, D> maskedRhsValue = InterleaveMasks & rhs.value;
+        // Add these coordinate-wise, then turn all bits not belonging to the current coordinate OFF
+        const portable_vector_t<storage_t, D> interleaveShiftedResult = (counterMaskedValue + maskedRhsValue) & InterleaveMasks;
+        // Re-encode the result
+        array_get<portable_vector_t<storage_t, D>, storage_t> getter;
+        this_t retVal;
+        retVal.value = getter(interleaveShiftedResult, 0);
+        [[unroll]]
+        for (uint16_t i = 1; i < D; i++)
+            retVal.value = retVal.value | getter(interleaveShiftedResult, i);
+        return retVal;
+    }
+
+    // This is the dual trick of the one used for addition: set all other bits to 0 so borrows propagate
+    NBL_CONSTEXPR_FUNC this_t operator-(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        const portable_vector_t<storage_t, D> InterleaveMasks = NBL_MORTON_INTERLEAVE_MASKS(storage_t, D, Bits, impl::);
+        // For each coordinate, leave its bits intact and turn every other bit OFF
+        const portable_vector_t<storage_t, D> maskedValue = InterleaveMasks & value;
+        // Do the same for each coordinate in rhs
+        const portable_vector_t<storage_t, D> maskedRhsValue = InterleaveMasks & rhs.value;
+        // Subtract these coordinate-wise, then turn all bits not belonging to the current coordinate OFF
+        const portable_vector_t<storage_t, D> interleaveShiftedResult = (maskedValue - maskedRhsValue) & InterleaveMasks;
+        // Re-encode the result
+        array_get<portable_vector_t<storage_t, D>, storage_t> getter;
+        this_t retVal;
+        retVal.value = getter(interleaveShiftedResult, 0);
+        [[unroll]]
+        for (uint16_t i = 1; i < D; i++)
+            retVal.value = retVal.value | getter(interleaveShiftedResult, i);
+
+        return retVal;
+    }
+
+    // ------------------------------------------------------- COMPARISON OPERATORS -------------------------------------------------
+
+    NBL_CONSTEXPR_FUNC bool operator==(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return value == rhs.value;
+    }
+
+    template<bool BitsAlreadySpread, typename I 
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> equal(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::Equals<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }  
+
+    NBL_CONSTEXPR_FUNC bool operator!=(NBL_CONST_REF_ARG(this_t) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return value != rhs.value;
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> notEqual(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return !equal<BitsAlreadySpread, I>(rhs);
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> lessThan(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::LessThan<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> lessThanEquals(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::LessEquals<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> greaterThan(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::GreaterThan<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }
+
+    template<bool BitsAlreadySpread, typename I
+    NBL_FUNC_REQUIRES(impl::Comparable<Signed, Bits, storage_t, BitsAlreadySpread, I>)
+    NBL_CONSTEXPR_FUNC vector<bool, D> greaterThanEquals(NBL_CONST_REF_ARG(vector<I, D>) rhs) NBL_CONST_MEMBER_FUNC
+    {
+        return impl::GreaterEquals<Signed, Bits, D, storage_t, BitsAlreadySpread>::template __call<I>(value, rhs);
+    }
+
+};
+
+} //namespace morton
+
+// Specialize the `static_cast_helper`
+namespace impl
+{
+
+// I must be of same signedness as the morton code, and be wide enough to hold each component
+template<typename I, uint16_t Bits, uint16_t D, typename _uint64_t> NBL_PARTIAL_REQ_TOP(concepts::IntegralScalar<I> && 8 * sizeof(I) >= Bits)
+struct static_cast_helper<vector<I, D>, morton::code<is_signed_v<I>, Bits, D, _uint64_t> NBL_PARTIAL_REQ_BOT(concepts::IntegralScalar<I> && 8 * sizeof(I) >= Bits) >
+{
+    NBL_CONSTEXPR_STATIC_FUNC vector<I, D> cast(NBL_CONST_REF_ARG(morton::code<is_signed_v<I>, Bits, D, _uint64_t>) val)
+    {
+        using storage_t = typename morton::code<is_signed_v<I>, Bits, D, _uint64_t>::storage_t;
+        return morton::impl::Transcoder<D, Bits, storage_t>::decode(val.value);
+    }
+};
+
+} // namespace impl
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t>
+struct left_shift_operator<morton::code<Signed, Bits, D, _uint64_t> >
+{
+    using type_t = morton::code<Signed, Bits, D, _uint64_t>;
+    using storage_t = typename type_t::storage_t;
+
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits)
+    {
+        left_shift_operator<storage_t> valueLeftShift;
+        type_t retVal;
+        // Shift every coordinate by `bits`
+        retVal.value = valueLeftShift(operand.value, bits * D);
+        // Previous shift might move bits to positions that storage has available but the morton code does not use
+        // Un-decoding the resulting morton is still fine and produces expected results, but some operations such as equality expect these unused bits to be 0 so we mask them off
+        const uint64_t UsedBitsMask = Bits * D < 64 ? (uint64_t(1) << (Bits * D)) - 1 : ~uint64_t(0);
+        retVal.value = retVal.value & _static_cast<storage_t>(UsedBitsMask);
+        return retVal;
+    }
+};
+
+template<uint16_t Bits, uint16_t D, typename _uint64_t>
+struct arithmetic_right_shift_operator<morton::code<false, Bits, D, _uint64_t> >
+{
+    using type_t = morton::code<false, Bits, D, _uint64_t>;
+    using storage_t = typename type_t::storage_t;
+
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits)
+    {
+        arithmetic_right_shift_operator<storage_t> valueArithmeticRightShift;
+        type_t retVal;
+        // Shift every coordinate by `bits`
+        retVal.value = valueArithmeticRightShift(operand.value, bits * D);
+        return retVal;
+    }
+};
+
+// This one's uglier - have to unpack to get the expected behaviour
+template<uint16_t Bits, uint16_t D, typename _uint64_t>
+struct arithmetic_right_shift_operator<morton::code<true, Bits, D, _uint64_t> >
+{
+    using type_t = morton::code<true, Bits, D, _uint64_t>;
+    using scalar_t = conditional_t<(Bits > 16), int32_t, int16_t>;
+
+    NBL_CONSTEXPR_FUNC type_t operator()(NBL_CONST_REF_ARG(type_t) operand, uint16_t bits)
+    {
+        vector<scalar_t, D> cartesian = _static_cast<vector<scalar_t, D> >(operand);
+        // To avoid branching, we left-shift each coordinate to put the MSB (of the encoded Morton) at the position of the MSB (of the `scalar_t` used for the decoded coordinate),
+        // then right-shift again to get correct sign on each coordinate
+        // The number of bits we shift by to put MSB of Morton at MSB of `scalar_t` is the difference between the bitwidth of `scalar_t` and Bits
+        const scalar_t ShiftFactor = scalar_t(8 * sizeof(scalar_t) - Bits);
+        cartesian <<= ShiftFactor;
+        cartesian >>= ShiftFactor + scalar_t(bits);
+        return type_t::create(cartesian);
+    }
+};
+
+#ifndef __HLSL_VERSION
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t NBL_FUNC_REQUIRES(morton::impl::Dimension<D>&& D* Bits <= 64)
+constexpr morton::code<Signed, Bits, D, _uint64_t> morton::code<Signed, Bits, D, _uint64_t>::operator<<(uint16_t bits) const
+{
+    left_shift_operator<morton::code<Signed, Bits, D, _uint64_t>> leftShift;
+    return leftShift(*this, bits);
+}
+
+template<bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t NBL_FUNC_REQUIRES(morton::impl::Dimension<D>&& D* Bits <= 64)
+constexpr morton::code<Signed, Bits, D, _uint64_t> morton::code<Signed, Bits, D, _uint64_t>::operator>>(uint16_t bits) const
+{
+    arithmetic_right_shift_operator<morton::code<Signed, Bits, D, _uint64_t>> rightShift;
+    return rightShift(*this, bits);
+}
+
+template <bool Signed, uint16_t Bits, uint16_t D, typename _uint64_t NBL_PRIMARY_REQUIRES(morton::impl::Dimension<D>&& D* Bits <= 64)
+template <typename I NBL_FUNC_REQUIRES(8 * sizeof(I) >= Bits && is_signed_v<I> == Signed)
+constexpr morton::code<Signed, Bits, D, _uint64_t>::operator vector<I, D>() const noexcept
+{
+    return _static_cast<vector<I, D>, morton::code<Signed, Bits, D>>(*this);
+}
+
+#endif
+
+#undef NBL_MORTON_INTERLEAVE_MASKS
+#undef NBL_MORTON_SIGN_MASKS
+
+} //namespace hlsl
+} //namespace nbl
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/mpl.hlsl b/include/nbl/builtin/hlsl/mpl.hlsl
index 2015b05b3d..67f6445324 100644
--- a/include/nbl/builtin/hlsl/mpl.hlsl
+++ b/include/nbl/builtin/hlsl/mpl.hlsl
@@ -43,13 +43,23 @@ struct countl_zero : impl::countl_zero<uint64_t(N), (sizeof(T) * 8)>
 template<class T, T N>
 NBL_CONSTEXPR T countl_zero_v = countl_zero<T,N>::value;
 
+template<uint64_t N>
+struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {};
+template<uint64_t N>
+NBL_CONSTEXPR bool is_pot_v = is_pot<N>::value;
+
 template<uint64_t X>
 struct log2
 {
     NBL_CONSTEXPR_STATIC_INLINE uint16_t value = X ? (1ull<<6)-countl_zero<uint64_t, X>::value-1 : -1ull;
 };
 template<uint64_t X>
-NBL_CONSTEXPR uint64_t log2_v = log2<X>::value;
+NBL_CONSTEXPR uint16_t log2_v = log2<X>::value;
+
+template<uint64_t X>
+struct log2_ceil : integral_constant<uint16_t, log2_v<X> + uint16_t(!is_pot_v<X>)> {};
+template<uint64_t X>
+NBL_CONSTEXPR uint16_t log2_ceil_v = log2_ceil<X>::value;
 
 template<typename T, T X, int32_t S>
 struct rotl
@@ -79,11 +89,6 @@ struct align_up
 template<uint64_t X, uint64_t M>
 NBL_CONSTEXPR uint64_t align_up_v = align_up<X,M>::value;
 
-template<uint64_t N>
-struct is_pot : bool_constant< (N > 0 && !(N & (N - 1))) > {};
-template<uint64_t N>
-NBL_CONSTEXPR bool is_pot_v = is_pot<N>::value;
-
 template<typename T, T X, T Y>
 struct max
 {
@@ -99,6 +104,17 @@ struct min
 };
 template<typename T, T X, T Y>
 NBL_CONSTEXPR T min_v = min<T,X,Y>::value;
+
+template<uint64_t X>
+struct round_up_to_pot : integral_constant<uint64_t, uint64_t(1) << log2_ceil_v<X> > {};
+template<uint64_t X>
+NBL_CONSTEXPR uint64_t round_up_to_pot_v = round_up_to_pot<X>::value;
+
+template<uint64_t X>
+struct round_down_to_pot : integral_constant<uint64_t, uint64_t(1) << log2_v<X> > {};
+template<uint64_t X>
+NBL_CONSTEXPR uint64_t round_down_to_pot_v = round_down_to_pot<X>::value;
+
 }
 }
 }
diff --git a/include/nbl/builtin/hlsl/portable/int64_t.hlsl b/include/nbl/builtin/hlsl/portable/int64_t.hlsl
new file mode 100644
index 0000000000..2dffa40a2d
--- /dev/null
+++ b/include/nbl/builtin/hlsl/portable/int64_t.hlsl
@@ -0,0 +1,36 @@
+#ifndef _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_
+#define _NBL_BUILTIN_HLSL_PORTABLE_INT64_T_INCLUDED_
+
+#include <nbl/builtin/hlsl/emulated/int64_t.hlsl>
+#include <nbl/builtin/hlsl/device_capabilities_traits.hlsl>
+
+// define NBL_FORCE_EMULATED_INT_64 to force using emulated int64 types
+
+namespace nbl
+{
+namespace hlsl
+{
+#ifdef __HLSL_VERSION
+#ifdef NBL_FORCE_EMULATED_INT_64
+template<typename device_caps = void>
+using portable_uint64_t = emulated_uint64_t;
+template<typename device_caps = void>
+using portable_int64_t = emulated_int64_t;
+#else
+template<typename device_caps = void>
+using portable_uint64_t = typename conditional<device_capabilities_traits<device_caps>::shaderInt64, uint64_t, emulated_uint64_t>::type;
+template<typename device_caps = void>
+using portable_int64_t = typename conditional<device_capabilities_traits<device_caps>::shaderInt64, int64_t, emulated_int64_t>::type;
+#endif
+
+#else
+template<typename device_caps = void>
+using portable_uint64_t = uint64_t;
+template<typename device_caps = void>
+using portable_int64_t = int64_t;
+#endif
+
+}
+}
+
+#endif
\ No newline at end of file
diff --git a/include/nbl/builtin/hlsl/portable/vector_t.hlsl b/include/nbl/builtin/hlsl/portable/vector_t.hlsl
index ace199e20b..16d5b40f81 100644
--- a/include/nbl/builtin/hlsl/portable/vector_t.hlsl
+++ b/include/nbl/builtin/hlsl/portable/vector_t.hlsl
@@ -3,6 +3,7 @@
 
 #include <nbl/builtin/hlsl/emulated/vector_t.hlsl>
 #include <nbl/builtin/hlsl/portable/float64_t.hlsl>
+#include <nbl/builtin/hlsl/portable/int64_t.hlsl>
 
 namespace nbl
 {
@@ -36,19 +37,53 @@ template<typename T>
 using portable_vector_t4 = portable_vector_t<T, 4>;
 
 #ifdef __HLSL_VERSION
+// Float
 template<typename device_caps = void>
 using portable_float64_t2 = portable_vector_t2<portable_float64_t<device_caps> >;
 template<typename device_caps = void>
 using portable_float64_t3 = portable_vector_t3<portable_float64_t<device_caps> >;
 template<typename device_caps = void>
 using portable_float64_t4 = portable_vector_t4<portable_float64_t<device_caps> >;
+
+// Uint
+template<typename device_caps = void>
+using portable_uint64_t2 = portable_vector_t2<portable_uint64_t<device_caps> >;
+template<typename device_caps = void>
+using portable_uint64_t3 = portable_vector_t3<portable_uint64_t<device_caps> >;
+template<typename device_caps = void>
+using portable_uint64_t4 = portable_vector_t4<portable_uint64_t<device_caps> >;
+
+//Int
+template<typename device_caps = void>
+using portable_int64_t2 = portable_vector_t2<portable_int64_t<device_caps> >;
+template<typename device_caps = void>
+using portable_int64_t3 = portable_vector_t3<portable_int64_t<device_caps> >;
+template<typename device_caps = void>
+using portable_int64_t4 = portable_vector_t4<portable_int64_t<device_caps> >;
 #else
+// Float
 template<typename device_caps = void>
 using portable_float64_t2 = portable_vector_t2<float64_t>;
 template<typename device_caps = void>
 using portable_float64_t3 = portable_vector_t3<float64_t>;
 template<typename device_caps = void>
 using portable_float64_t4 = portable_vector_t4<float64_t>;
+
+// Uint
+template<typename device_caps = void>
+using portable_uint64_t2 = portable_vector_t2<uint64_t>;
+template<typename device_caps = void>
+using portable_uint64_t3 = portable_vector_t3<uint64_t>;
+template<typename device_caps = void>
+using portable_uint64_t4 = portable_vector_t4<uint64_t>;
+
+// Int
+template<typename device_caps = void>
+using portable_int64_t2 = portable_vector_t2<int64_t>;
+template<typename device_caps = void>
+using portable_int64_t3 = portable_vector_t3<int64_t>;
+template<typename device_caps = void>
+using portable_int64_t4 = portable_vector_t4<int64_t>;
 #endif
 
 }
diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl
index 7da69c4a55..8afceb7fef 100644
--- a/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl
+++ b/include/nbl/builtin/hlsl/spirv_intrinsics/core.hlsl
@@ -4,13 +4,15 @@
 #ifndef _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_
 #define _NBL_BUILTIN_HLSL_SPIRV_INTRINSICS_CORE_INCLUDED_
 
+#include <nbl/builtin/hlsl/spirv_intrinsics/output_structs.hlsl>
+
 #ifdef __HLSL_VERSION // TODO: AnastZIuk fix public search paths so we don't choke
 #include "spirv/unified1/spirv.hpp"
 
 #include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>
 #include <nbl/builtin/hlsl/type_traits.hlsl>
 #include <nbl/builtin/hlsl/concepts.hlsl>
-#include <nbl/builtin/hlsl/spirv_intrinsics/output_structs.hlsl>
+#include <nbl/builtin/hlsl/concepts/vector.hlsl>
 
 namespace nbl 
 {
@@ -112,7 +114,12 @@ NBL_CONSTEXPR_STATIC_INLINE bool is_bda_pointer_v = is_bda_pointer<T>::value;
 
 
 //! General Operations
- 
+
+//! Miscellaneous Instructions
+template<typename T>
+[[vk::ext_instruction(spv::OpUndef)]]
+T undef();
+
 //
 template<typename M, typename T>
 [[vk::ext_instruction(spv::OpAccessChain)]]
@@ -338,6 +345,11 @@ template<typename BooleanVector>
 [[vk::ext_instruction(spv::OpAny)]]
 enable_if_t<is_vector_v<BooleanVector>&& is_same_v<typename vector_traits<BooleanVector>::scalar_type, bool>, BooleanVector> any(BooleanVector vec);
 
+// If Condition is a vector, ResultType must be a vector with the same number of components. Using (p -> q) = (~p v q)
+template<typename Condition, typename ResultType NBL_FUNC_REQUIRES(concepts::Boolean<Condition> && (! concepts::Vector<Condition> || (concepts::Vector<ResultType> && (extent_v<Condition> == extent_v<ResultType>))))
+[[vk::ext_instruction(spv::OpSelect)]]
+ResultType select(Condition condition, ResultType object1, ResultType object2);
+
 template<typename T NBL_FUNC_REQUIRES(concepts::UnsignedIntegral<T>)
 [[vk::ext_instruction(spv::OpIAddCarry)]]
 AddCarryOutput<T> addCarry(T operand1, T operand2);
diff --git a/include/nbl/builtin/hlsl/type_traits.hlsl b/include/nbl/builtin/hlsl/type_traits.hlsl
index a6f9ad0655..a6b3db6708 100644
--- a/include/nbl/builtin/hlsl/type_traits.hlsl
+++ b/include/nbl/builtin/hlsl/type_traits.hlsl
@@ -664,6 +664,8 @@ using conditional_t = typename conditional<C,T,F>::type;
 
 
 // Template Variables
+template<class T, T val>
+NBL_CONSTEXPR T integral_constant_v = integral_constant<T, val>::value;
 template<typename A, typename B>
 NBL_CONSTEXPR bool is_same_v = is_same<A, B>::value;
 template<class T>
@@ -682,12 +684,20 @@ template<class T>
 NBL_CONSTEXPR uint32_t alignment_of_v = alignment_of<T>::value;
 template<class T, uint32_t N = 0>
 NBL_CONSTEXPR uint64_t extent_v = extent<T, N>::value;
+template<typename T>
+NBL_CONSTEXPR bool is_fundamental_v = is_fundamental<T>::value;
 
 
 // Overlapping definitions
 template<typename T>
 using make_void_t = typename make_void<T>::type;
 
+template<typename T>
+using make_signed_t = typename make_signed<T>::type;
+
+template<typename T>
+using make_unsigned_t = typename make_unsigned<T>::type;
+
 template<bool C, typename T, T A, T B>
 struct conditional_value
 {
diff --git a/include/nbl/builtin/hlsl/workgroup/fft.hlsl b/include/nbl/builtin/hlsl/workgroup/fft.hlsl
index 3b600cd8ad..c23841dd50 100644
--- a/include/nbl/builtin/hlsl/workgroup/fft.hlsl
+++ b/include/nbl/builtin/hlsl/workgroup/fft.hlsl
@@ -40,7 +40,7 @@ struct OptimalFFTParameters
     uint16_t workgroupSizeLog2 : 8;
 
     // Used to check if the parameters returned by `optimalFFTParameters` are valid
-    bool areValid()
+    bool areValid() NBL_CONST_MEMBER_FUNC
     {
         return elementsPerInvocationLog2 > 0 && workgroupSizeLog2 > 0;
     }
@@ -53,9 +53,9 @@ struct OptimalFFTParameters
 * @param [in] inputArrayLength The length of the array to run an FFT on
 * @param [in] minSubgroupSize The smallest possible number of threads that can run in a single subgroup. 32 by default.
 */
-inline OptimalFFTParameters optimalFFTParameters(uint32_t maxWorkgroupSize, uint32_t inputArrayLength, uint32_t minSubgroupSize)
+NBL_CONSTEXPR_FUNC OptimalFFTParameters optimalFFTParameters(uint32_t maxWorkgroupSize, uint32_t inputArrayLength, uint32_t minSubgroupSize)
 {
-    NBL_CONSTEXPR_STATIC OptimalFFTParameters invalidParameters = { 0 , 0 };
+    const OptimalFFTParameters invalidParameters = { 0 , 0 };
 
     if (minSubgroupSize < 4 || maxWorkgroupSize < minSubgroupSize || inputArrayLength <= minSubgroupSize)
         return invalidParameters;
@@ -81,15 +81,16 @@ inline OptimalFFTParameters optimalFFTParameters(uint32_t maxWorkgroupSize, uint
 
 namespace impl
 {
+
 template<uint16_t N, uint16_t H>
-enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftRightHigher(uint32_t i)
+NBL_CONSTEXPR_FUNC enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftRightHigher(uint32_t i)
 {
     // Highest H bits are numbered N-1 through N - H
     // N - H is then the middle bit
     // Lowest bits numbered from 0 through N - H - 1
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t lowMask = (1 << (N - H)) - 1;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t midMask = 1 << (N - H);
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t highMask = ~(lowMask | midMask);
+    const uint32_t lowMask = (1 << (N - H)) - 1;
+    const uint32_t midMask = 1 << (N - H);
+    const uint32_t highMask = ~(lowMask | midMask);
 
     uint32_t low = i & lowMask;
     uint32_t mid = i & midMask;
@@ -102,14 +103,14 @@ enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftRightHigher(uint32_t
 }
 
 template<uint16_t N, uint16_t H>
-enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftLeftHigher(uint32_t i)
+NBL_CONSTEXPR_FUNC enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftLeftHigher(uint32_t i)
 {
     // Highest H bits are numbered N-1 through N - H
     // N - 1 is then the highest bit, and N - 2 through N - H are the middle bits
     // Lowest bits numbered from 0 through N - H - 1
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t lowMask = (1 << (N - H)) - 1;
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t highMask = 1 << (N - 1);
-    NBL_CONSTEXPR_STATIC_INLINE uint32_t midMask = ~(lowMask | highMask);
+    const uint32_t lowMask = (1 << (N - H)) - 1;
+    const uint32_t highMask = 1 << (N - 1);
+    const uint32_t midMask = ~(lowMask | highMask);
 
     uint32_t low = i & lowMask;
     uint32_t mid = i & midMask;
@@ -120,6 +121,7 @@ enable_if_t<(H <= N) && (N < 32), uint32_t> circularBitShiftLeftHigher(uint32_t
 
     return mid | high | low;
 }
+
 } //namespace impl
 
 template<uint16_t ElementsPerInvocationLog2, uint16_t WorkgroupSizeLog2>
@@ -127,26 +129,26 @@ struct FFTIndexingUtils
 {
     // This function maps the index `outputIdx` in the output array of a Nabla FFT to the index `freqIdx` in the DFT such that `DFT[freqIdx] = NablaFFT[outputIdx]`
     // This is because Cooley-Tukey + subgroup operations end up spewing out the outputs in a weird order
-    static uint32_t getDFTIndex(uint32_t outputIdx)
+    NBL_CONSTEXPR_STATIC_FUNC uint32_t getDFTIndex(uint32_t outputIdx)
     {
         return impl::circularBitShiftRightHigher<FFTSizeLog2, FFTSizeLog2 - ElementsPerInvocationLog2 + 1>(hlsl::bitReverseAs<uint32_t>(outputIdx, FFTSizeLog2));
     }
 
     // This function maps the index `freqIdx` in the DFT to the index `idx` in the output array of a Nabla FFT such that `DFT[freqIdx] = NablaFFT[idx]`
     // It is essentially the inverse of `getDFTIndex`
-    static uint32_t getNablaIndex(uint32_t freqIdx)
+    NBL_CONSTEXPR_STATIC_FUNC uint32_t getNablaIndex(uint32_t freqIdx)
     {
         return hlsl::bitReverseAs<uint32_t>(impl::circularBitShiftLeftHigher<FFTSizeLog2, FFTSizeLog2 - ElementsPerInvocationLog2 + 1>(freqIdx), FFTSizeLog2);
     }
 
     // Mirrors an index about the Nyquist frequency in the DFT order
-    static uint32_t getDFTMirrorIndex(uint32_t freqIdx)
+    NBL_CONSTEXPR_STATIC_FUNC uint32_t getDFTMirrorIndex(uint32_t freqIdx)
     {
         return (FFTSize - freqIdx) & (FFTSize - 1);
     }
 
     // Given an index `outputIdx` of an element into the Nabla FFT, get the index into the Nabla FFT of the element corresponding to its negative frequency
-    static uint32_t getNablaMirrorIndex(uint32_t outputIdx)
+    NBL_CONSTEXPR_STATIC_FUNC uint32_t getNablaMirrorIndex(uint32_t outputIdx)
     {
         return getNablaIndex(getDFTMirrorIndex(getDFTIndex(outputIdx)));
     }
@@ -326,7 +328,7 @@ struct FFT<false, fft::ConstevalParameters<1, WorkgroupSizeLog2, Scalar>, device
     template<typename Accessor, typename SharedMemoryAccessor NBL_FUNC_REQUIRES(fft::FFTAccessor<Accessor, Scalar> && fft::FFTSharedMemoryAccessor<SharedMemoryAccessor>)
     static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor)
     {
-        NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize;
+        const uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize;
 
         // Compute the indices only once
         const uint32_t threadID = uint32_t(SubgroupContiguousIndex());
@@ -392,7 +394,7 @@ struct FFT<true, fft::ConstevalParameters<1, WorkgroupSizeLog2, Scalar>, device_
     template<typename Accessor, typename SharedMemoryAccessor NBL_FUNC_REQUIRES(fft::FFTAccessor<Accessor, Scalar> && fft::FFTSharedMemoryAccessor<SharedMemoryAccessor>)
     static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor)
     {
-        NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize;
+        const uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize;
 
         // Compute the indices only once
         const uint32_t threadID = uint32_t(SubgroupContiguousIndex());
@@ -453,8 +455,8 @@ struct FFT<false, fft::ConstevalParameters<ElementsPerInvocationLog2, WorkgroupS
     template<typename Accessor, typename SharedMemoryAccessor NBL_FUNC_REQUIRES(fft::FFTAccessor<Accessor, Scalar> && fft::FFTSharedMemoryAccessor<SharedMemoryAccessor>)
     static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor)
     {
-        NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize;
-        NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation;
+        const uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize;
+        const uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation;
 
         [unroll]
         for (uint32_t stride = (ElementsPerInvocation / 2) * WorkgroupSize; stride > WorkgroupSize; stride >>= 1)
@@ -501,8 +503,8 @@ struct FFT<true, fft::ConstevalParameters<ElementsPerInvocationLog2, WorkgroupSi
     template<typename Accessor, typename SharedMemoryAccessor NBL_FUNC_REQUIRES(fft::FFTAccessor<Accessor, Scalar> && fft::FFTSharedMemoryAccessor<SharedMemoryAccessor>)
     static void __call(NBL_REF_ARG(Accessor) accessor, NBL_REF_ARG(SharedMemoryAccessor) sharedmemAccessor)
     {
-        NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize;
-        NBL_CONSTEXPR_STATIC_INLINE uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation;
+        const uint16_t WorkgroupSize = consteval_params_t::WorkgroupSize;
+        const uint16_t ElementsPerInvocation = consteval_params_t::ElementsPerInvocation;
 
         // do K/2 small workgroup FFTs
         accessor_adaptors::Offset<Accessor> offsetAccessor;
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 9333a0d3b4..b2e914bf73 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -217,10 +217,12 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/macros.h")
 # emulated
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/float64_t_impl.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/int64_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/vector_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/emulated/matrix_t.hlsl")
 # portable
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/float64_t.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/int64_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/vector_t.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/portable/matrix_t.hlsl")
 # ieee754
@@ -249,6 +251,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/basic.h")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/intrinsics.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/matrix.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/promote.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/truncate.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/vector.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/cpp_compat/impl/intrinsics_impl.hlsl")
 #glsl compat
@@ -369,5 +372,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/tgmath/output_structs.hlsl")
 #blur
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/blur.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/prefix_sum_blur/box_sampler.hlsl")
+#morton codes
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/morton.hlsl")
 
 ADD_CUSTOM_BUILTIN_RESOURCES(nblBuiltinResourceData NBL_RESOURCES_TO_EMBED "${NBL_ROOT_PATH}/include" "nbl/builtin" "nbl::builtin" "${NBL_ROOT_PATH_BINARY}/include" "${NBL_ROOT_PATH_BINARY}/src" "STATIC" "INTERNAL")