Skip to content

Commit

Permalink
Converting between different different types in the render buffer.
Browse files Browse the repository at this point in the history
  • Loading branch information
sirpalee committed Mar 24, 2020
1 parent 0370f85 commit c0ebd62
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 23 deletions.
191 changes: 188 additions & 3 deletions render_delegate/render_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.
#include "render_buffer.h"

#include <pxr/base/gf/half.h>
#include <pxr/base/gf/vec3i.h>

#include <ai.h>
Expand All @@ -23,6 +24,174 @@
// TOOD(pal): use a more efficient locking mechanism than the std::mutex.
PXR_NAMESPACE_OPEN_SCOPE

namespace {

// Mapping the HdFormat base type to a C++ type.
// The function querying the component size is not constexpr.
template <int TYPE>
struct HdFormatType {
using type = void;
};

template <>
struct HdFormatType<HdFormatUNorm8> {
using type = uint8_t;
};

template <>
struct HdFormatType<HdFormatSNorm8> {
using type = int8_t;
};

template <>
struct HdFormatType<HdFormatFloat16> {
using type = GfHalf;
};

template <>
struct HdFormatType<HdFormatFloat32> {
using type = float;
};

template <>
struct HdFormatType<HdFormatInt32> {
using type = int32_t;
};

// We are storing the function pointers in an unordered map and using a very simple, well packed key to look them up.
// We need to investigate if the overhead of the unordered_map lookup, the function call and pushing the arguments
// to the stack are significant, compared to inlining all the functions.
struct ConversionKey {
const uint16_t from;
const uint16_t to;
ConversionKey(int _from, int _to) : from(static_cast<uint16_t>(_from)), to(static_cast<uint16_t>(_to)) {}
struct HashFunctor {
size_t operator()(const ConversionKey& key) const
{
// The max value for the key is 20.
// TODO(pal): Use HdFormatCount to better pack the keys.
return key.to | (key.from << 8);
}
};
};

inline bool operator==(const ConversionKey& a, const ConversionKey& b) { return a.from == b.from && a.to == b.to; }

inline bool supportedComponentFormat(HdFormat format)
{
const auto componentFormat = HdGetComponentFormat(format);
return componentFormat == HdFormatUNorm8 || componentFormat == HdFormatSNorm8 ||
componentFormat == HdFormatFloat16 || componentFormat == HdFormatFloat32 || componentFormat == HdFormatInt32;
}

template <typename TO, typename FROM>
inline TO convertType(FROM from)
{
return static_cast<TO>(from);
}

// TODO(pal): Dithering?
template <>
inline uint8_t convertType(float from)
{
return std::max(0, std::min(static_cast<int>(from * 255.0f), 255));
}

template <>
inline uint8_t convertType(GfHalf from)
{
return std::max(0, std::min(static_cast<int>(from * 255.0f), 255));
}

template <>
inline int8_t convertType(float from)
{
return std::max(-127, std::min(static_cast<int>(from * 127.0f), 127));
}

template <>
inline int8_t convertType(GfHalf from)
{
return std::max(-127, std::min(static_cast<int>(from * 127.0f), 127));
}

// xo, xe, yo, ye is already clamped against width and height and we checked corner cases when the bucket is empty.
template <int TO, int FROM>
inline void writeBucket(
void* buffer, size_t componentCount, unsigned int width, unsigned int height, const void* bucketData,
size_t bucketComponentCount, unsigned int xo, unsigned int xe, unsigned int yo, unsigned int ye,
unsigned int bucketWidth)
{
auto* to =
static_cast<typename HdFormatType<TO>::type*>(buffer) + (xo + (height - yo - 1) * width) * componentCount;
const auto* from = static_cast<const typename HdFormatType<FROM>::type*>(bucketData);

const auto toStep = width * componentCount;
const auto fromStep = bucketWidth * bucketComponentCount;

const auto copyOp = [](const typename HdFormatType<FROM>::type& in) -> typename HdFormatType<TO>::type {
return convertType<typename HdFormatType<TO>::type, typename HdFormatType<FROM>::type>(in);
};
const auto dataWidth = xe - xo;
// We use std::transform instead of std::copy, so we can add special logic for float32/float16. If the lambda is
// just a straight copy, the behavior should be the same since we can't use memcpy.
if (componentCount == bucketComponentCount) {
const auto copyWidth = dataWidth * componentCount;
for (auto y = yo; y < ye; y += 1) {
std::transform(from, from + copyWidth, to, copyOp);
to -= toStep;
from += fromStep;
}
} else { // We need to call std::transform per pixel with the amount of components to copy.
const auto componentsToCopy = std::min(componentCount, bucketComponentCount);
for (auto y = yo; y < ye; y += 1) {
for (auto x = decltype(dataWidth){0}; x < dataWidth; x += 1) {
std::transform(
from + x * bucketComponentCount, from + x * bucketComponentCount + componentsToCopy,
to + x * componentCount, copyOp);
}
to -= toStep;
from += fromStep;
}
}
}

using WriteBucketFunction = void (*)(
void*, size_t, unsigned int, unsigned int, const void*, size_t, unsigned int, unsigned int, unsigned int,
unsigned int, unsigned int);

using WriteBucketFunctionMap = std::unordered_map<ConversionKey, WriteBucketFunction, ConversionKey::HashFunctor>;

WriteBucketFunctionMap writeBucketFunctions{
// Write to UNorm8 format.
{{HdFormatUNorm8, HdFormatSNorm8}, writeBucket<HdFormatUNorm8, HdFormatSNorm8>},
{{HdFormatUNorm8, HdFormatFloat16}, writeBucket<HdFormatUNorm8, HdFormatFloat16>},
{{HdFormatUNorm8, HdFormatFloat32}, writeBucket<HdFormatUNorm8, HdFormatFloat32>},
{{HdFormatUNorm8, HdFormatInt32}, writeBucket<HdFormatUNorm8, HdFormatInt32>},
// Write to SNorm8 format.
{{HdFormatSNorm8, HdFormatUNorm8}, writeBucket<HdFormatSNorm8, HdFormatUNorm8>},
{{HdFormatSNorm8, HdFormatFloat16}, writeBucket<HdFormatSNorm8, HdFormatFloat16>},
{{HdFormatSNorm8, HdFormatFloat32}, writeBucket<HdFormatSNorm8, HdFormatFloat32>},
{{HdFormatSNorm8, HdFormatInt32}, writeBucket<HdFormatSNorm8, HdFormatInt32>},
// Write to Float16 format.
{{HdFormatFloat16, HdFormatSNorm8}, writeBucket<HdFormatFloat16, HdFormatSNorm8>},
{{HdFormatFloat16, HdFormatUNorm8}, writeBucket<HdFormatFloat16, HdFormatUNorm8>},
{{HdFormatFloat16, HdFormatFloat32}, writeBucket<HdFormatFloat16, HdFormatFloat32>},
{{HdFormatFloat16, HdFormatInt32}, writeBucket<HdFormatFloat16, HdFormatInt32>},
// Write to Float32 format.
{{HdFormatFloat32, HdFormatSNorm8}, writeBucket<HdFormatFloat32, HdFormatSNorm8>},
{{HdFormatFloat32, HdFormatUNorm8}, writeBucket<HdFormatFloat32, HdFormatUNorm8>},
{{HdFormatFloat32, HdFormatFloat16}, writeBucket<HdFormatFloat32, HdFormatFloat16>},
{{HdFormatFloat32, HdFormatInt32}, writeBucket<HdFormatFloat32, HdFormatInt32>},
// Write to Int32 format.
{{HdFormatInt32, HdFormatSNorm8}, writeBucket<HdFormatInt32, HdFormatSNorm8>},
{{HdFormatInt32, HdFormatUNorm8}, writeBucket<HdFormatInt32, HdFormatUNorm8>},
{{HdFormatInt32, HdFormatFloat16}, writeBucket<HdFormatInt32, HdFormatFloat16>},
{{HdFormatInt32, HdFormatFloat32}, writeBucket<HdFormatInt32, HdFormatFloat32>},
};

} // namespace

HdArnoldRenderBuffer::HdArnoldRenderBuffer(const SdfPath& id) : HdRenderBuffer(id) {}

bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format, bool multiSampled)
Expand All @@ -31,6 +200,9 @@ bool HdArnoldRenderBuffer::Allocate(const GfVec3i& dimensions, HdFormat format,
// So deallocate won't lock.
decltype(_buffer) tmp{};
_buffer.swap(tmp);
if (!supportedComponentFormat(format)) {
return false;
}
TF_UNUSED(multiSampled);
_format = format;
_width = dimensions[0];
Expand Down Expand Up @@ -67,7 +239,14 @@ void HdArnoldRenderBuffer::WriteBucket(
unsigned int bucketXO, unsigned int bucketYO, unsigned int bucketWidth, unsigned int bucketHeight, HdFormat format,
const void* bucketData)
{
if (!supportedComponentFormat(format)) {
return;
}
std::lock_guard<std::mutex> _guard(_mutex);
// Checking for empty buffers.
if (_buffer.empty()) {
return;
}
const auto xo = AiClamp(bucketXO, 0u, _width);
const auto xe = AiClamp(bucketXO + bucketWidth, 0u, _width);
// Empty bucket.
Expand Down Expand Up @@ -121,21 +300,27 @@ void HdArnoldRenderBuffer::WriteBucket(
} else {
// Component counts do not match, we need to copy as much data as possible and leave the rest to their
// default values, we expect someone to set that up before this call.
const auto copiedDataSize = std::min(inComponentCount, componentCount) * HdDataSizeOfFormat(componentFormat);
const auto copiedDataSize =
std::min(inComponentCount, componentCount) * HdDataSizeOfFormat(componentFormat);
// The pixelSize is different for the incoming data.
const auto inPixelSize = HdDataSizeOfFormat(format);
// The size of the line for the bucket, this could be more than the data copied.
const auto inLineDataSize = bucketWidth * inPixelSize;
for (auto y = yo; y < ye; y += 1) {
for (auto x = xo; x < xe; x += 1) {
for (auto x = decltype(dataWidth){0}; x < dataWidth; x += 1) {
memcpy(data + x * pixelSize, inData + x * inPixelSize, copiedDataSize);
}
data -= fullLineDataSize;
inData += inLineDataSize;
}
}
} else { // Need to do conversion.
return;
const auto it = writeBucketFunctions.find({componentFormat, inComponentFormat});
if (it != writeBucketFunctions.end()) {
it->second(
_buffer.data(), componentCount, _width, _height, bucketData, inComponentCount, xo, xe, yo, ye,
bucketWidth);
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion render_delegate/render_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class HdArnoldRenderBuffer : public HdRenderBuffer {
unsigned int _width = 0; ///< Buffer width.
unsigned int _height = 0; ///< Buffer height.
HdFormat _format = HdFormat::HdFormatUNorm8Vec4; ///< Internal format of the buffer.
bool _converged; ///< Store if the render buffer has converged.
bool _converged = false; ///< Store if the render buffer has converged.
};

using HdArnoldRenderBufferStorage = std::unordered_map<TfToken, HdArnoldRenderBuffer*, TfToken::HashFunctor>;
Expand Down
20 changes: 9 additions & 11 deletions render_delegate/render_delegate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,24 +663,22 @@ AtNode* HdArnoldRenderDelegate::GetFallbackVolumeShader() const { return _fallba
HdAovDescriptor HdArnoldRenderDelegate::GetDefaultAovDescriptor(TfToken const& name) const
{
if (name == HdAovTokens->color) {
#ifdef USD_HAS_UPDATED_COMPOSITOR
#if 1
return HdAovDescriptor(HdFormatFloat32Vec4, false, VtValue(GfVec4f(0.0f)));
#else
return HdAovDescriptor(HdFormatUNorm8Vec4, false, VtValue(GfVec4f(0.0f)));
return HdAovDescriptor(HdFormatUNorm8Vec4, false, VtValue(GfVec4f(0.0f, 0.0f, 0.0f, 0.0f)));
#endif
} else if (name == HdAovTokens->depth) {
return HdAovDescriptor(HdFormatFloat32, false, VtValue(1.0f));
} else if (name == HdAovTokens->primId) {
return HdAovDescriptor(HdFormatInt32, false, VtValue(-1));
} else if (name == HdAovTokens->instanceId ||
name == HdAovTokens->elementId ||
name == HdAovTokens->pointId) {
return HdAovDescriptor(HdFormatInt32, false, VtValue(-1));
} else if (name == HdAovTokens->instanceId || name == HdAovTokens->elementId || name == HdAovTokens->pointId) {
// We are only supporting the prim id buffer for now.
return HdAovDescriptor();
} else if (name == HdAovTokens->normal ||
name == HdAovTokens->Neye ||
name == "linearDepth" || // This was changed to cameraDepth after 0.19.11.
name == "cameraDepth") {
return HdAovDescriptor(HdFormatInt32, false, VtValue(-1));
} else if (
name == HdAovTokens->normal || name == HdAovTokens->Neye ||
name == "linearDepth" || // This was changed to cameraDepth after 0.19.11.
name == "cameraDepth") {
// More built-in aovs.
return HdAovDescriptor();
} else if (TfStringStartsWith(name.GetString(), HdAovTokens->primvars)) {
Expand Down
15 changes: 9 additions & 6 deletions render_delegate/render_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,6 @@ HdArnoldRenderPass::HdArnoldRenderPass(
_depth(SdfPath::EmptyPath()),
_primId(SdfPath::EmptyPath())
{
{
AtString reason;
#if AI_VERSION_ARCH_NUM > 5
_gpuSupportEnabled = AiDeviceTypeIsSupported(AI_DEVICE_TYPE_GPU, reason);
#endif
}
auto* universe = _delegate->GetUniverse();
_camera = AiNode(universe, str::persp_camera);
AiNodeSetPtr(AiUniverseGetOptions(universe), str::camera, _camera);
Expand Down Expand Up @@ -129,6 +123,15 @@ void HdArnoldRenderPass::_Execute(const HdRenderPassStateSharedPtr& renderPassSt
// TODO(pal): Remove bindings to P and RGBA. Those are used for other buffers. Or add support for writing to
// these in the driver.
HdRenderPassAovBindingVector aovBindings = renderPassState->GetAovBindings();
// These buffers are not supported, but we still need to allocate and set them up for hydra.
aovBindings.erase(
std::remove_if(
aovBindings.begin(), aovBindings.end(),
[](const HdRenderPassAovBinding& binding) -> bool {
return binding.aovName == HdAovTokens->elementId || binding.aovName == HdAovTokens->instanceId ||
binding.aovName == HdAovTokens->pointId;
}),
aovBindings.end());

if (aovBindings.empty()) {
// TODO (pal): Implement.
Expand Down
3 changes: 1 addition & 2 deletions render_delegate/render_pass.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,7 @@ class HdArnoldRenderPass : public HdRenderPass {
int _width = 0; ///< Width of the render buffer.
int _height = 0; ///< Height of the render buffer.

bool _isConverged = false; ///< State of the render convergence.
bool _gpuSupportEnabled = false; ///< If the GPU backend is supported.
bool _isConverged = false; ///< State of the render convergence.
};

PXR_NAMESPACE_CLOSE_SCOPE

0 comments on commit c0ebd62

Please sign in to comment.