diff --git a/render_delegate/render_buffer.cpp b/render_delegate/render_buffer.cpp
index d7089b6f8f..95f68e6647 100644
--- a/render_delegate/render_buffer.cpp
+++ b/render_delegate/render_buffer.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include "render_buffer.h"
 
+#include <pxr/base/gf/half.h>
 #include <pxr/base/gf/vec3i.h>
 
 #include <ai.h>
@@ -23,6 +24,174 @@
 // TOOD(pal): use a more efficient locking mechanism than the std::mutex.
 PXR_NAMESPACE_OPEN_SCOPE
 
+namespace {
+
+// Mapping the HdFormat base type to a C++ type.
+// The function querying the component size is not constexpr.
+template <int TYPE>
+struct HdFormatType {
+    using type = void;
+};
+
+template <>
+struct HdFormatType<HdFormatUNorm8> {
+    using type = uint8_t;
+};
+
+template <>
+struct HdFormatType<HdFormatSNorm8> {
+    using type = int8_t;
+};
+
+template <>
+struct HdFormatType<HdFormatFloat16> {
+    using type = GfHalf;
+};
+
+template <>
+struct HdFormatType<HdFormatFloat32> {
+    using type = float;
+};
+
+template <>
+struct HdFormatType<HdFormatInt32> {
+    using type = int32_t;
+};
+
+// We are storing the function pointers in an unordered map and using a very simple, well packed key to look them up.
+// We need to investigate if the overhead of the unordered_map lookup, the function call and pushing the arguments
+// to the stack are significant, compared to inlining all the functions.
+struct ConversionKey {
+    const uint16_t from;
+    const uint16_t to;
+    ConversionKey(int _from, int _to) : from(static_cast<uint16_t>(_from)), to(static_cast<uint16_t>(_to)) {}
+    struct HashFunctor {
+        size_t operator()(const ConversionKey& key) const
+        {
+            // The max value for the key is 20.
+            // TODO(pal): Use HdFormatCount to better pack the keys.
+            return key.to | (key.from << 8);
+        }
+    };
+};
+
+inline bool operator==(const ConversionKey& a, const ConversionKey& b) { return a.from == b.from && a.to == b.to; }
+
+inline bool supportedComponentFormat(HdFormat format)
+{
+    const auto componentFormat = HdGetComponentFormat(format);
+    return componentFormat == HdFormatUNorm8 || componentFormat == HdFormatSNorm8 ||
+           componentFormat == HdFormatFloat16 || componentFormat == HdFormatFloat32 || componentFormat == HdFormatInt32;
+}
+
+template <typename TO, typename FROM>
+inline TO convertType(FROM from)
+{
+    return static_cast<TO>(from);
+}
+
+// TODO(pal): Dithering?
+template <>
+inline uint8_t convertType(float from)
+{
+    return std::max(0, std::min(static_cast<int>(from * 255.0f), 255));
+}
+
+template <>
+inline uint8_t convertType(GfHalf from)
+{
+    return std::max(0, std::min(static_cast<int>(from * 255.0f), 255));
+}
+
+template <>
+inline int8_t convertType(float from)
+{
+    return std::max(-127, std::min(static_cast<int>(from * 127.0f), 127));
+}
+
+template <>
+inline int8_t convertType(GfHalf from)
+{
+    return std::max(-127, std::min(static_cast<int>(from * 127.0f), 127));
+}
+
+// xo, xe, yo, ye is already clamped against width and height and we checked corner cases when the bucket is empty.
+template <int TO, int FROM>
+inline void writeBucket(
+    void* buffer, size_t componentCount, unsigned int width, unsigned int height, const void* bucketData,
+    size_t bucketComponentCount, unsigned int xo, unsigned int xe, unsigned int yo, unsigned int ye,
+    unsigned int bucketWidth)
+{
+    auto* to =
+        static_cast<typename HdFormatType<TO>::type*>(buffer) + (xo + (height - yo - 1) * width) * componentCount;
+    const auto* from = static_cast<const typename HdFormatType<FROM>::type*>(bucketData);
+
+    const auto toStep = width * componentCount;
+    const auto fromStep = bucketWidth * bucketComponentCount;
+
+    const auto copyOp = [](const typename HdFormatType<FROM>::type& in) -> typename HdFormatType<TO>::type {
+        return convertType<typename HdFormatType<TO>::type, typename HdFormatType<FROM>::type>(in);
+    };
+    const auto dataWidth = xe - xo;
+    // We use std::transform instead of std::copy, so we can add special logic for float32/float16. If the lambda is
+    // just a straight copy, the behavior should be the same since we can't use memcpy.
+    if (componentCount == bucketComponentCount) {
+        const auto copyWidth = dataWidth * componentCount;
+        for (auto y = yo; y < ye; y += 1) {
+            std::transform(from, from + copyWidth, to, copyOp);
+            to -= toStep;
+            from += fromStep;
+        }
+    } else { // We need to call std::transform per pixel with the amount of components to copy.
+        const auto componentsToCopy = std::min(componentCount, bucketComponentCount);
+        for (auto y = yo; y < ye; y += 1) {
+            for (auto x = decltype(dataWidth){0}; x < dataWidth; x += 1) {
+                std::transform(
+                    from + x * bucketComponentCount, from + x * bucketComponentCount + componentsToCopy,
+                    to + x * componentCount, copyOp);
+            }
+            to -= toStep;
+            from += fromStep;
+        }
+    }
+}
+
+using WriteBucketFunction = void (*)(
+    void*, size_t, unsigned int, unsigned int, const void*, size_t, unsigned int, unsigned int, unsigned int,
+    unsigned int, unsigned int);
+
+using WriteBucketFunctionMap = std::unordered_map<ConversionKey, WriteBucketFunction, ConversionKey::HashFunctor>;
+
+WriteBucketFunctionMap writeBucketFunctions{
+    // Write to UNorm8 format.
+    {{HdFormatUNorm8, HdFormatSNorm8}, writeBucket<HdFormatUNorm8, HdFormatSNorm8>},
+    {{HdFormatUNorm8, HdFormatFloat16}, writeBucket<HdFormatUNorm8, HdFormatFloat16>},
+    {{HdFormatUNorm8, HdFormatFloat32}, writeBucket<HdFormatUNorm8, HdFormatFloat32>},
+    {{HdFormatUNorm8, HdFormatInt32}, writeBucket<HdFormatUNorm8, HdFormatInt32>},
+    // Write to SNorm8 format.
+    {{HdFormatSNorm8, HdFormatUNorm8}, writeBucket<HdFormatSNorm8, HdFormatUNorm8>},
+    {{HdFormatSNorm8, HdFormatFloat16}, writeBucket<HdFormatSNorm8, HdFormatFloat16>},
+    {{HdFormatSNorm8, HdFormatFloat32}, writeBucket<HdFormatSNorm8, HdFormatFloat32>},
+    {{HdFormatSNorm8, HdFormatInt32}, writeBucket<HdFormatSNorm8, HdFormatInt32>},
+    // Write to Float16 format.
+    {{HdFormatFloat16, HdFormatSNorm8}, writeBucket<HdFormatFloat16, HdFormatSNorm8>},
+    {{HdFormatFloat16, HdFormatUNorm8}, writeBucket<HdFormatFloat16, HdFormatUNorm8>},
+    {{HdFormatFloat16, HdFormatFloat32}, writeBucket<HdFormatFloat16, HdFormatFloat32>},
+    {{HdFormatFloat16, HdFormatInt32}, writeBucket<HdFormatFloat16, HdFormatInt32>},
+    // Write to Float32 format.
+    {{HdFormatFloat32, HdFormatSNorm8}, writeBucket<HdFormatFloat32, HdFormatSNorm8>},
+    {{HdFormatFloat32, HdFormatUNorm8}, writeBucket<HdFormatFloat32, HdFormatUNorm8>},
+    {{HdFormatFloat32, HdFormatFloat16}, writeBucket<HdFormatFloat32, HdFormatFloat16>},
+    {{HdFormatFloat32, HdFormatInt32}, writeBucket<HdFormatFloat32, HdFormatInt32>},
+    // Write to Int32 format.
+    {{HdFormatInt32, HdFormatSNorm8}, writeBucket<HdFormatInt32, HdFormatSNorm8>},
+    {{HdFormatInt32, HdFormatUNorm8}, writeBucket<HdFormatInt32, HdFormatUNorm8>},
+    {{HdFormatInt32, HdFormatFloat16}, writeBucket<HdFormatInt32, HdFormatFloat16>},
+    {{HdFormatInt32, HdFormatFloat32}, writeBucket<HdFormatInt32, HdFormatFloat32>},
+};
+
+} // namespace
+
 HdArnoldRenderBuffer::HdArnoldRenderBuffer(const SdfPath& id) : HdRenderBuffer(id) {}
 
 bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format, bool multiSampled)
@@ -31,6 +200,9 @@ bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format,
     // So deallocate won't lock.
     decltype(_buffer) tmp{};
     _buffer.swap(tmp);
+    if (!supportedComponentFormat(format)) {
+        return false;
+    }
     TF_UNUSED(multiSampled);
     _format = format;
     _width = dimensions[0];
@@ -67,7 +239,14 @@ void HdArnoldRenderBuffer::WriteBucket(
     unsigned int bucketXO, unsigned int bucketYO, unsigned int bucketWidth, unsigned int bucketHeight, HdFormat format,
     const void* bucketData)
 {
+    if (!supportedComponentFormat(format)) {
+        return;
+    }
     std::lock_guard<std::mutex> _guard(_mutex);
+    // Checking for empty buffers.
+    if (_buffer.empty()) {
+        return;
+    }
     const auto xo = AiClamp(bucketXO, 0u, _width);
     const auto xe = AiClamp(bucketXO + bucketWidth, 0u, _width);
     // Empty bucket.
@@ -121,13 +300,14 @@ void HdArnoldRenderBuffer::WriteBucket(
         } else {
             // Component counts do not match, we need to copy as much data as possible and leave the rest to their
             // default values, we expect someone to set that up before this call.
-            const auto copiedDataSize = std::min(inComponentCount, componentCount) * HdDataSizeOfFormat(componentFormat);
+            const auto copiedDataSize =
+                std::min(inComponentCount, componentCount) * HdDataSizeOfFormat(componentFormat);
             // The pixelSize is different for the incoming data.
             const auto inPixelSize = HdDataSizeOfFormat(format);
             // The size of the line for the bucket, this could be more than the data copied.
             const auto inLineDataSize = bucketWidth * inPixelSize;
             for (auto y = yo; y < ye; y += 1) {
-                for (auto x = xo; x < xe; x += 1) {
+                for (auto x = decltype(dataWidth){0}; x < dataWidth; x += 1) {
                     memcpy(data + x * pixelSize, inData + x * inPixelSize, copiedDataSize);
                 }
                 data -= fullLineDataSize;
@@ -135,7 +315,12 @@ void HdArnoldRenderBuffer::WriteBucket(
             }
         }
     } else { // Need to do conversion.
-        return;
+        const auto it = writeBucketFunctions.find({componentFormat, inComponentFormat});
+        if (it != writeBucketFunctions.end()) {
+            it->second(
+                _buffer.data(), componentCount, _width, _height, bucketData, inComponentCount, xo, xe, yo, ye,
+                bucketWidth);
+        }
     }
 }
 
diff --git a/render_delegate/render_buffer.h b/render_delegate/render_buffer.h
index 80641d21d6..91d23bdd58 100644
--- a/render_delegate/render_buffer.h
+++ b/render_delegate/render_buffer.h
@@ -102,7 +102,7 @@ class HdArnoldRenderBuffer : public HdRenderBuffer {
     unsigned int _width = 0;                         ///< Buffer width.
     unsigned int _height = 0;                        ///< Buffer height.
     HdFormat _format = HdFormat::HdFormatUNorm8Vec4; ///< Internal format of the buffer.
-    bool _converged;                                 ///< Store if the render buffer has converged.
+    bool _converged = false;                         ///< Store if the render buffer has converged.
 };
 
 using HdArnoldRenderBufferStorage = std::unordered_map<TfToken, HdArnoldRenderBuffer*, TfToken::HashFunctor>;
diff --git a/render_delegate/render_delegate.cpp b/render_delegate/render_delegate.cpp
index da2def7a26..1060eb41ea 100755
--- a/render_delegate/render_delegate.cpp
+++ b/render_delegate/render_delegate.cpp
@@ -663,24 +663,22 @@ AtNode* HdArnoldRenderDelegate::GetFallbackVolumeShader() const { return _fallba
 HdAovDescriptor HdArnoldRenderDelegate::GetDefaultAovDescriptor(TfToken const& name) const
 {
     if (name == HdAovTokens->color) {
-#ifdef USD_HAS_UPDATED_COMPOSITOR
+#if 1
         return HdAovDescriptor(HdFormatFloat32Vec4, false, VtValue(GfVec4f(0.0f)));
 #else
-        return HdAovDescriptor(HdFormatUNorm8Vec4, false, VtValue(GfVec4f(0.0f)));
+        return HdAovDescriptor(HdFormatUNorm8Vec4, false, VtValue(GfVec4f(0.0f, 0.0f, 0.0f, 0.0f)));
 #endif
     } else if (name == HdAovTokens->depth) {
         return HdAovDescriptor(HdFormatFloat32, false, VtValue(1.0f));
     } else if (name == HdAovTokens->primId) {
-         return HdAovDescriptor(HdFormatInt32, false, VtValue(-1));
-    } else if (name == HdAovTokens->instanceId ||
-               name == HdAovTokens->elementId ||
-               name == HdAovTokens->pointId) {
+        return HdAovDescriptor(HdFormatInt32, false, VtValue(-1));
+    } else if (name == HdAovTokens->instanceId || name == HdAovTokens->elementId || name == HdAovTokens->pointId) {
         // We are only supporting the prim id buffer for now.
-        return HdAovDescriptor();
-    } else if (name == HdAovTokens->normal ||
-               name == HdAovTokens->Neye ||
-               name == "linearDepth" || // This was changed to cameraDepth after 0.19.11.
-               name == "cameraDepth") {
+        return HdAovDescriptor(HdFormatInt32, false, VtValue(-1));
+    } else if (
+        name == HdAovTokens->normal || name == HdAovTokens->Neye ||
+        name == "linearDepth" || // This was changed to cameraDepth after 0.19.11.
+        name == "cameraDepth") {
         // More built-in aovs.
         return HdAovDescriptor();
     } else if (TfStringStartsWith(name.GetString(), HdAovTokens->primvars)) {
diff --git a/render_delegate/render_pass.cpp b/render_delegate/render_pass.cpp
index 984b9e6989..0a628857ac 100755
--- a/render_delegate/render_pass.cpp
+++ b/render_delegate/render_pass.cpp
@@ -58,12 +58,6 @@ HdArnoldRenderPass::HdArnoldRenderPass(
       _depth(SdfPath::EmptyPath()),
       _primId(SdfPath::EmptyPath())
 {
-    {
-        AtString reason;
-#if AI_VERSION_ARCH_NUM > 5
-        _gpuSupportEnabled = AiDeviceTypeIsSupported(AI_DEVICE_TYPE_GPU, reason);
-#endif
-    }
     auto* universe = _delegate->GetUniverse();
     _camera = AiNode(universe, str::persp_camera);
     AiNodeSetPtr(AiUniverseGetOptions(universe), str::camera, _camera);
@@ -129,6 +123,15 @@ void HdArnoldRenderPass::_Execute(const HdRenderPassStateSharedPtr& renderPassSt
     // TODO(pal): Remove bindings to P and RGBA. Those are used for other buffers. Or add support for writing to
     //  these in the driver.
     HdRenderPassAovBindingVector aovBindings = renderPassState->GetAovBindings();
+    // These buffers are not supported, but we still need to allocate and set them up for hydra.
+    aovBindings.erase(
+        std::remove_if(
+            aovBindings.begin(), aovBindings.end(),
+            [](const HdRenderPassAovBinding& binding) -> bool {
+                return binding.aovName == HdAovTokens->elementId || binding.aovName == HdAovTokens->instanceId ||
+                       binding.aovName == HdAovTokens->pointId;
+            }),
+        aovBindings.end());
 
     if (aovBindings.empty()) {
         // TODO (pal): Implement.
diff --git a/render_delegate/render_pass.h b/render_delegate/render_pass.h
index d0d378b658..2a9fc05046 100755
--- a/render_delegate/render_pass.h
+++ b/render_delegate/render_pass.h
@@ -109,8 +109,7 @@ class HdArnoldRenderPass : public HdRenderPass {
     int _width = 0;  ///< Width of the render buffer.
     int _height = 0; ///< Height of the render buffer.
 
-    bool _isConverged = false;       ///< State of the render convergence.
-    bool _gpuSupportEnabled = false; ///< If the GPU backend is supported.
+    bool _isConverged = false; ///< State of the render convergence.
 };
 
 PXR_NAMESPACE_CLOSE_SCOPE