diff --git a/render_delegate/render_buffer.cpp b/render_delegate/render_buffer.cpp index d7089b6f8f..95f68e6647 100644 --- a/render_delegate/render_buffer.cpp +++ b/render_delegate/render_buffer.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include "render_buffer.h" +#include #include #include @@ -23,6 +24,174 @@ // TOOD(pal): use a more efficient locking mechanism than the std::mutex. PXR_NAMESPACE_OPEN_SCOPE +namespace { + +// Mapping the HdFormat base type to a C++ type. +// The function querying the component size is not constexpr. +template +struct HdFormatType { + using type = void; +}; + +template <> +struct HdFormatType { + using type = uint8_t; +}; + +template <> +struct HdFormatType { + using type = int8_t; +}; + +template <> +struct HdFormatType { + using type = GfHalf; +}; + +template <> +struct HdFormatType { + using type = float; +}; + +template <> +struct HdFormatType { + using type = int32_t; +}; + +// We are storing the function pointers in an unordered map and using a very simple, well packed key to look them up. +// We need to investigate if the overhead of the unordered_map lookup, the function call and pushing the arguments +// to the stack are significant, compared to inlining all the functions. +struct ConversionKey { + const uint16_t from; + const uint16_t to; + ConversionKey(int _from, int _to) : from(static_cast(_from)), to(static_cast(_to)) {} + struct HashFunctor { + size_t operator()(const ConversionKey& key) const + { + // The max value for the key is 20. + // TODO(pal): Use HdFormatCount to better pack the keys. + return key.to | (key.from << 8); + } + }; +}; + +inline bool operator==(const ConversionKey& a, const ConversionKey& b) { return a.from == b.from && a.to == b.to; } + +inline bool supportedComponentFormat(HdFormat format) +{ + const auto componentFormat = HdGetComponentFormat(format); + return componentFormat == HdFormatUNorm8 || componentFormat == HdFormatSNorm8 || + componentFormat == HdFormatFloat16 || componentFormat == HdFormatFloat32 || componentFormat == HdFormatInt32; +} + +template +inline TO convertType(FROM from) +{ + return static_cast(from); +} + +// TODO(pal): Dithering? +template <> +inline uint8_t convertType(float from) +{ + return std::max(0, std::min(static_cast(from * 255.0f), 255)); +} + +template <> +inline uint8_t convertType(GfHalf from) +{ + return std::max(0, std::min(static_cast(from * 255.0f), 255)); +} + +template <> +inline int8_t convertType(float from) +{ + return std::max(-127, std::min(static_cast(from * 127.0f), 127)); +} + +template <> +inline int8_t convertType(GfHalf from) +{ + return std::max(-127, std::min(static_cast(from * 127.0f), 127)); +} + +// xo, xe, yo, ye is already clamped against width and height and we checked corner cases when the bucket is empty. +template +inline void writeBucket( + void* buffer, size_t componentCount, unsigned int width, unsigned int height, const void* bucketData, + size_t bucketComponentCount, unsigned int xo, unsigned int xe, unsigned int yo, unsigned int ye, + unsigned int bucketWidth) +{ + auto* to = + static_cast::type*>(buffer) + (xo + (height - yo - 1) * width) * componentCount; + const auto* from = static_cast::type*>(bucketData); + + const auto toStep = width * componentCount; + const auto fromStep = bucketWidth * bucketComponentCount; + + const auto copyOp = [](const typename HdFormatType::type& in) -> typename HdFormatType::type { + return convertType::type, typename HdFormatType::type>(in); + }; + const auto dataWidth = xe - xo; + // We use std::transform instead of std::copy, so we can add special logic for float32/float16. If the lambda is + // just a straight copy, the behavior should be the same since we can't use memcpy. + if (componentCount == bucketComponentCount) { + const auto copyWidth = dataWidth * componentCount; + for (auto y = yo; y < ye; y += 1) { + std::transform(from, from + copyWidth, to, copyOp); + to -= toStep; + from += fromStep; + } + } else { // We need to call std::transform per pixel with the amount of components to copy. + const auto componentsToCopy = std::min(componentCount, bucketComponentCount); + for (auto y = yo; y < ye; y += 1) { + for (auto x = decltype(dataWidth){0}; x < dataWidth; x += 1) { + std::transform( + from + x * bucketComponentCount, from + x * bucketComponentCount + componentsToCopy, + to + x * componentCount, copyOp); + } + to -= toStep; + from += fromStep; + } + } +} + +using WriteBucketFunction = void (*)( + void*, size_t, unsigned int, unsigned int, const void*, size_t, unsigned int, unsigned int, unsigned int, + unsigned int, unsigned int); + +using WriteBucketFunctionMap = std::unordered_map; + +WriteBucketFunctionMap writeBucketFunctions{ + // Write to UNorm8 format. + {{HdFormatUNorm8, HdFormatSNorm8}, writeBucket}, + {{HdFormatUNorm8, HdFormatFloat16}, writeBucket}, + {{HdFormatUNorm8, HdFormatFloat32}, writeBucket}, + {{HdFormatUNorm8, HdFormatInt32}, writeBucket}, + // Write to SNorm8 format. + {{HdFormatSNorm8, HdFormatUNorm8}, writeBucket}, + {{HdFormatSNorm8, HdFormatFloat16}, writeBucket}, + {{HdFormatSNorm8, HdFormatFloat32}, writeBucket}, + {{HdFormatSNorm8, HdFormatInt32}, writeBucket}, + // Write to Float16 format. + {{HdFormatFloat16, HdFormatSNorm8}, writeBucket}, + {{HdFormatFloat16, HdFormatUNorm8}, writeBucket}, + {{HdFormatFloat16, HdFormatFloat32}, writeBucket}, + {{HdFormatFloat16, HdFormatInt32}, writeBucket}, + // Write to Float32 format. + {{HdFormatFloat32, HdFormatSNorm8}, writeBucket}, + {{HdFormatFloat32, HdFormatUNorm8}, writeBucket}, + {{HdFormatFloat32, HdFormatFloat16}, writeBucket}, + {{HdFormatFloat32, HdFormatInt32}, writeBucket}, + // Write to Int32 format. + {{HdFormatInt32, HdFormatSNorm8}, writeBucket}, + {{HdFormatInt32, HdFormatUNorm8}, writeBucket}, + {{HdFormatInt32, HdFormatFloat16}, writeBucket}, + {{HdFormatInt32, HdFormatFloat32}, writeBucket}, +}; + +} // namespace + HdArnoldRenderBuffer::HdArnoldRenderBuffer(const SdfPath& id) : HdRenderBuffer(id) {} bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format, bool multiSampled) @@ -31,6 +200,9 @@ bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format, // So deallocate won't lock. decltype(_buffer) tmp{}; _buffer.swap(tmp); + if (!supportedComponentFormat(format)) { + return false; + } TF_UNUSED(multiSampled); _format = format; _width = dimensions[0]; @@ -67,7 +239,14 @@ void HdArnoldRenderBuffer::WriteBucket( unsigned int bucketXO, unsigned int bucketYO, unsigned int bucketWidth, unsigned int bucketHeight, HdFormat format, const void* bucketData) { + if (!supportedComponentFormat(format)) { + return; + } std::lock_guard _guard(_mutex); + // Checking for empty buffers. + if (_buffer.empty()) { + return; + } const auto xo = AiClamp(bucketXO, 0u, _width); const auto xe = AiClamp(bucketXO + bucketWidth, 0u, _width); // Empty bucket. @@ -121,13 +300,14 @@ void HdArnoldRenderBuffer::WriteBucket( } else { // Component counts do not match, we need to copy as much data as possible and leave the rest to their // default values, we expect someone to set that up before this call. - const auto copiedDataSize = std::min(inComponentCount, componentCount) * HdDataSizeOfFormat(componentFormat); + const auto copiedDataSize = + std::min(inComponentCount, componentCount) * HdDataSizeOfFormat(componentFormat); // The pixelSize is different for the incoming data. const auto inPixelSize = HdDataSizeOfFormat(format); // The size of the line for the bucket, this could be more than the data copied. const auto inLineDataSize = bucketWidth * inPixelSize; for (auto y = yo; y < ye; y += 1) { - for (auto x = xo; x < xe; x += 1) { + for (auto x = decltype(dataWidth){0}; x < dataWidth; x += 1) { memcpy(data + x * pixelSize, inData + x * inPixelSize, copiedDataSize); } data -= fullLineDataSize; @@ -135,7 +315,12 @@ void HdArnoldRenderBuffer::WriteBucket( } } } else { // Need to do conversion. - return; + const auto it = writeBucketFunctions.find({componentFormat, inComponentFormat}); + if (it != writeBucketFunctions.end()) { + it->second( + _buffer.data(), componentCount, _width, _height, bucketData, inComponentCount, xo, xe, yo, ye, + bucketWidth); + } } } diff --git a/render_delegate/render_buffer.h b/render_delegate/render_buffer.h index 80641d21d6..91d23bdd58 100644 --- a/render_delegate/render_buffer.h +++ b/render_delegate/render_buffer.h @@ -102,7 +102,7 @@ class HdArnoldRenderBuffer : public HdRenderBuffer { unsigned int _width = 0; ///< Buffer width. unsigned int _height = 0; ///< Buffer height. HdFormat _format = HdFormat::HdFormatUNorm8Vec4; ///< Internal format of the buffer. - bool _converged; ///< Store if the render buffer has converged. + bool _converged = false; ///< Store if the render buffer has converged. }; using HdArnoldRenderBufferStorage = std::unordered_map; diff --git a/render_delegate/render_delegate.cpp b/render_delegate/render_delegate.cpp index da2def7a26..1060eb41ea 100755 --- a/render_delegate/render_delegate.cpp +++ b/render_delegate/render_delegate.cpp @@ -663,24 +663,22 @@ AtNode* HdArnoldRenderDelegate::GetFallbackVolumeShader() const { return _fallba HdAovDescriptor HdArnoldRenderDelegate::GetDefaultAovDescriptor(TfToken const& name) const { if (name == HdAovTokens->color) { -#ifdef USD_HAS_UPDATED_COMPOSITOR +#if 1 return HdAovDescriptor(HdFormatFloat32Vec4, false, VtValue(GfVec4f(0.0f))); #else - return HdAovDescriptor(HdFormatUNorm8Vec4, false, VtValue(GfVec4f(0.0f))); + return HdAovDescriptor(HdFormatUNorm8Vec4, false, VtValue(GfVec4f(0.0f, 0.0f, 0.0f, 0.0f))); #endif } else if (name == HdAovTokens->depth) { return HdAovDescriptor(HdFormatFloat32, false, VtValue(1.0f)); } else if (name == HdAovTokens->primId) { - return HdAovDescriptor(HdFormatInt32, false, VtValue(-1)); - } else if (name == HdAovTokens->instanceId || - name == HdAovTokens->elementId || - name == HdAovTokens->pointId) { + return HdAovDescriptor(HdFormatInt32, false, VtValue(-1)); + } else if (name == HdAovTokens->instanceId || name == HdAovTokens->elementId || name == HdAovTokens->pointId) { // We are only supporting the prim id buffer for now. - return HdAovDescriptor(); - } else if (name == HdAovTokens->normal || - name == HdAovTokens->Neye || - name == "linearDepth" || // This was changed to cameraDepth after 0.19.11. - name == "cameraDepth") { + return HdAovDescriptor(HdFormatInt32, false, VtValue(-1)); + } else if ( + name == HdAovTokens->normal || name == HdAovTokens->Neye || + name == "linearDepth" || // This was changed to cameraDepth after 0.19.11. + name == "cameraDepth") { // More built-in aovs. return HdAovDescriptor(); } else if (TfStringStartsWith(name.GetString(), HdAovTokens->primvars)) { diff --git a/render_delegate/render_pass.cpp b/render_delegate/render_pass.cpp index 984b9e6989..0a628857ac 100755 --- a/render_delegate/render_pass.cpp +++ b/render_delegate/render_pass.cpp @@ -58,12 +58,6 @@ HdArnoldRenderPass::HdArnoldRenderPass( _depth(SdfPath::EmptyPath()), _primId(SdfPath::EmptyPath()) { - { - AtString reason; -#if AI_VERSION_ARCH_NUM > 5 - _gpuSupportEnabled = AiDeviceTypeIsSupported(AI_DEVICE_TYPE_GPU, reason); -#endif - } auto* universe = _delegate->GetUniverse(); _camera = AiNode(universe, str::persp_camera); AiNodeSetPtr(AiUniverseGetOptions(universe), str::camera, _camera); @@ -129,6 +123,15 @@ void HdArnoldRenderPass::_Execute(const HdRenderPassStateSharedPtr& renderPassSt // TODO(pal): Remove bindings to P and RGBA. Those are used for other buffers. Or add support for writing to // these in the driver. HdRenderPassAovBindingVector aovBindings = renderPassState->GetAovBindings(); + // These buffers are not supported, but we still need to allocate and set them up for hydra. + aovBindings.erase( + std::remove_if( + aovBindings.begin(), aovBindings.end(), + [](const HdRenderPassAovBinding& binding) -> bool { + return binding.aovName == HdAovTokens->elementId || binding.aovName == HdAovTokens->instanceId || + binding.aovName == HdAovTokens->pointId; + }), + aovBindings.end()); if (aovBindings.empty()) { // TODO (pal): Implement. diff --git a/render_delegate/render_pass.h b/render_delegate/render_pass.h index d0d378b658..2a9fc05046 100755 --- a/render_delegate/render_pass.h +++ b/render_delegate/render_pass.h @@ -109,8 +109,7 @@ class HdArnoldRenderPass : public HdRenderPass { int _width = 0; ///< Width of the render buffer. int _height = 0; ///< Height of the render buffer. - bool _isConverged = false; ///< State of the render convergence. - bool _gpuSupportEnabled = false; ///< If the GPU backend is supported. + bool _isConverged = false; ///< State of the render convergence. }; PXR_NAMESPACE_CLOSE_SCOPE