Skip to content

Use alignmend memory for DrJit error functions #98

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions momentum/character_solver/simd_normal_error_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,17 @@ __vectorcall DRJIT_INLINE void jacobian_jointParams_to_modelParams(
const Eigen::Index iJointParam,
const ParameterTransform& parameterTransform_,
Eigen::Ref<Eigen::MatrixX<float>> jacobian) {
checkAlignment<kSimdAlignment>(jacobian);

// explicitly multiply with the parameter transform to generate parameter space gradients
for (auto index = parameterTransform_.transform.outerIndexPtr()[iJointParam];
index < parameterTransform_.transform.outerIndexPtr()[iJointParam + 1];
++index) {
const auto modelParamIdx = parameterTransform_.transform.innerIndexPtr()[index];
float* jacPtr = jacobian.col(modelParamIdx).data();
drjit::store(
drjit::store_aligned(
jacPtr,
drjit::load<FloatP>(jacPtr) +
drjit::load_aligned<FloatP>(jacPtr) +
parameterTransform_.transform.valuePtr()[index] * jacobian_jointParams);
}
}
Expand All @@ -297,8 +299,10 @@ double SimdNormalErrorFunction::getJacobian(
// storage for joint errors
std::vector<double> jointErrors(constraints_->numJoints);

// need to make sure we're actually at a 32 byte data offset at the first offset for SIMD access
checkAlignment<kSimdAlignment>(jacobian);
// Need to make sure we're actually at a kSimdAlignment byte data offset at the first offset for
// SIMD access
const size_t addressOffset = computeOffset<kSimdAlignment>(jacobian);
checkAlignment<kSimdAlignment>(jacobian, addressOffset);

// loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand All @@ -315,7 +319,7 @@ double SimdNormalErrorFunction::getJacobian(
for (uint32_t index = 0; index < constraintCount; index += kSimdPacketSize) {
const auto constraintOffsetIndex =
jointId * SimdNormalConstraints::kMaxConstraints + index;
const auto jacobianOffsetCur = jacobianOffset_[jointId] + index;
const auto jacobianOffsetCur = jacobianOffset_[jointId] + index + addressOffset;

const Vector3fP offset{
drjit::load<FloatP>(&constraints_->offsetX[constraintOffsetIndex]),
Expand All @@ -340,7 +344,8 @@ double SimdNormalErrorFunction::getJacobian(

jointError += weight * drjit::sqr(dist);

drjit::store(residual.segment(jacobianOffsetCur, kSimdPacketSize).data(), dist * wgt);
drjit::store_aligned(
residual.segment(jacobianOffsetCur, kSimdPacketSize).data(), dist * wgt);

// loop over all joints the constraint is attached to and calculate gradient
size_t jointIndex = jointId;
Expand Down Expand Up @@ -455,7 +460,8 @@ double SimdNormalErrorFunctionAVX::getJacobian(
std::vector<double> ets_error;

// need to make sure we're actually at a 32 byte data offset at the first offset for AVX access
checkAlignment<kAvxAlignment>(jacobian);
const size_t addressOffset = computeOffset<kAvxAlignment>(jacobian);
checkAlignment<kAvxAlignment>(jacobian, addressOffset);

// loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand All @@ -467,7 +473,7 @@ double SimdNormalErrorFunctionAVX::getJacobian(
constraints_->numJoints,
[&](double& error_local, const size_t jointId) {
// get initial offset
const auto offset = jacobianOffset_[jointId];
const auto offset = jacobianOffset_[jointId] + addressOffset;

// pre-load some joint specific values
const auto& transformation = state.jointState[jointId].transformation;
Expand Down
25 changes: 14 additions & 11 deletions momentum/character_solver/simd_plane_error_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,6 @@ double SimdPlaneErrorFunction::getGradient(
// Storage for joint errors
std::vector<double> jointErrors(constraints_->numJoints);

// Need to make sure we're actually at a 32 byte data offset at the first offset for SIMD access
checkAlignment<kSimdAlignment>(gradient);

// Loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
dispensoOptions.maxThreads = maxThreads_;
Expand Down Expand Up @@ -286,15 +283,17 @@ __vectorcall DRJIT_INLINE void jacobian_jointParams_to_modelParams(
const Eigen::Index iJointParam,
const ParameterTransform& parameterTransform_,
Eigen::Ref<Eigen::MatrixX<float>> jacobian) {
checkAlignment<kSimdAlignment>(jacobian);

// explicitly multiply with the parameter transform to generate parameter space gradients
for (auto index = parameterTransform_.transform.outerIndexPtr()[iJointParam];
index < parameterTransform_.transform.outerIndexPtr()[iJointParam + 1];
++index) {
const auto modelParamIdx = parameterTransform_.transform.innerIndexPtr()[index];
float* jacPtr = jacobian.col(modelParamIdx).data();
drjit::store(
drjit::store_aligned(
jacPtr,
drjit::load<FloatP>(jacPtr) +
drjit::load_aligned<FloatP>(jacPtr) +
parameterTransform_.transform.valuePtr()[index] * jacobian_jointParams);
}
}
Expand All @@ -317,8 +316,10 @@ double SimdPlaneErrorFunction::getJacobian(
// Storage for joint errors
std::vector<double> jointErrors(constraints_->numJoints);

// Need to make sure we're actually at a 32 byte data offset at the first offset for SIMD access
checkAlignment<kSimdAlignment>(jacobian);
// Need to make sure we're actually at a kSimdAlignment byte data offset at the first offset for
// SIMD access
const size_t addressOffset = computeOffset<kSimdAlignment>(jacobian);
checkAlignment<kSimdAlignment>(jacobian, addressOffset);

// Loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand All @@ -336,7 +337,7 @@ double SimdPlaneErrorFunction::getJacobian(
for (uint32_t index = 0; index < constraintCount; index += kSimdPacketSize) {
const auto constraintOffsetIndex =
jointId * SimdPlaneConstraints::kMaxConstraints + index;
const auto jacobianOffsetCur = jacobianOffset_[jointId] + index;
const auto jacobianOffsetCur = jacobianOffset_[jointId] + index + addressOffset;

// Transform offset by joint transform: pos = transform * offset
const Vector3fP offset{
Expand All @@ -363,7 +364,8 @@ double SimdPlaneErrorFunction::getJacobian(
const FloatP wgt = drjit::sqrt(kPlaneWeight * weight_ * constraintWeight);

// Calculate residual: res = wgt * dist
drjit::store(residual.segment(jacobianOffsetCur, kSimdPacketSize).data(), dist * wgt);
drjit::store_aligned(
residual.segment(jacobianOffsetCur, kSimdPacketSize).data(), dist * wgt);

// Loop over all joints the constraint is attached to and calculate gradient
size_t jointIndex = jointId;
Expand Down Expand Up @@ -762,7 +764,8 @@ double SimdPlaneErrorFunctionAVX::getJacobian(
std::vector<double> ets_error;

// need to make sure we're actually at a 32 byte data offset at the first offset for AVX access
checkAlignment<kAvxAlignment>(jacobian);
const size_t addressOffset = computeOffset<kAvxAlignment>(jacobian);
checkAlignment<kAvxAlignment>(jacobian, addressOffset);

// calculate actually used number of rows
const size_t maxRows = gsl::narrow_cast<size_t>(jacobian.rows());
Expand All @@ -787,7 +790,7 @@ double SimdPlaneErrorFunctionAVX::getJacobian(
constraints_->numJoints,
[&](double& error_local, const size_t jointId) {
// get initial offset
const auto offset = jacobianOffset_[jointId];
const auto offset = jacobianOffset_[jointId] + addressOffset;

// pre-load some joint specific values
const auto& transformation = state.jointState[jointId].transformation;
Expand Down
26 changes: 13 additions & 13 deletions momentum/character_solver/simd_position_error_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,6 @@ double SimdPositionErrorFunction::getGradient(
// Storage for joint errors
std::vector<double> jointErrors(constraints_->numJoints);

// Need to make sure we're actually at a kSimdAlignment byte data offset at the first offset for
// SIMD access
checkAlignment<kSimdAlignment>(gradient);

// Loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
dispensoOptions.maxThreads = maxThreads_;
Expand Down Expand Up @@ -282,15 +278,17 @@ __vectorcall DRJIT_INLINE void jacobian_jointParams_to_modelParams(
const Eigen::Index iJointParam,
const ParameterTransform& parameterTransform_,
Eigen::Ref<Eigen::MatrixX<float>> jacobian) {
checkAlignment<kSimdAlignment>(jacobian);

// explicitly multiply with the parameter transform to generate parameter space gradients
for (auto index = parameterTransform_.transform.outerIndexPtr()[iJointParam];
index < parameterTransform_.transform.outerIndexPtr()[iJointParam + 1];
++index) {
const auto modelParamIdx = parameterTransform_.transform.innerIndexPtr()[index];
float* jacPtr = jacobian.col(modelParamIdx).data();
drjit::store(
drjit::store_aligned(
jacPtr,
drjit::load<FloatP>(jacPtr) +
drjit::load_aligned<FloatP>(jacPtr) +
parameterTransform_.transform.valuePtr()[index] * jacobian_jointParams);
}
}
Expand All @@ -314,7 +312,8 @@ double SimdPositionErrorFunction::getJacobian(

// Need to make sure we're actually at a kSimdAlignment byte data offset at the first offset for
// SIMD access
checkAlignment<kSimdAlignment>(jacobian);
const size_t addressOffset = computeOffset<kSimdAlignment>(jacobian);
checkAlignment<kSimdAlignment>(jacobian, addressOffset);

// Loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand All @@ -332,7 +331,7 @@ double SimdPositionErrorFunction::getJacobian(
index += kSimdPacketSize, jindex += kSimdPacketSize * kConstraintDim) {
const auto constraintOffsetIndex =
jointId * SimdPositionConstraints::kMaxConstraints + index;
const auto jacobianOffsetCur = jacobianOffset_[jointId] + jindex;
const auto jacobianOffsetCur = jacobianOffset_[jointId] + jindex + addressOffset;

// Transform offset by joint transform: pos = transform * offset
const Vector3fP offset{
Expand Down Expand Up @@ -362,13 +361,13 @@ double SimdPositionErrorFunction::getJacobian(

// Calculate residual: res = wgt * diff
const Vector3fP res = wgt * diff;
drjit::store(
drjit::store_aligned(
residual.segment(jacobianOffsetCur + kSimdPacketSize * 0, kSimdPacketSize).data(),
res.x());
drjit::store(
drjit::store_aligned(
residual.segment(jacobianOffsetCur + kSimdPacketSize * 1, kSimdPacketSize).data(),
res.y());
drjit::store(
drjit::store_aligned(
residual.segment(jacobianOffsetCur + kSimdPacketSize * 2, kSimdPacketSize).data(),
res.z());

Expand Down Expand Up @@ -802,7 +801,8 @@ double SimdPositionErrorFunctionAVX::getJacobian(
std::vector<double> ets_error;

// need to make sure we're actually at a 32 byte data offset at the first offset for AVX access
checkAlignment<kAvxAlignment>(jacobian);
const size_t addressOffset = computeOffset<kAvxAlignment>(jacobian);
checkAlignment<kAvxAlignment>(jacobian, addressOffset);

// loop over all joints, as these are our base units
auto dispensoOptions = dispenso::ParForOptions();
Expand All @@ -814,7 +814,7 @@ double SimdPositionErrorFunctionAVX::getJacobian(
constraints_->numJoints,
[&](double& error_local, const size_t jointId) {
// get initial offset
const auto offset = jacobianOffset_[jointId];
const auto offset = jacobianOffset_[jointId] + addressOffset;

// pre-load some joint specific values
const auto& transformation = state.jointState[jointId].transformation;
Expand Down
19 changes: 17 additions & 2 deletions momentum/simd/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,26 @@ using Vector3fP = Vector3P<float>;
using Vector2dP = Vector2P<double>;
using Vector3dP = Vector3P<double>;

/// Computes the offset required for the matrix data to meet the alignment requirement.
template <size_t Alignment>
[[nodiscard]] size_t computeOffset(const Eigen::Ref<Eigen::MatrixXf>& mat) {
constexpr size_t sizeOfScalar = sizeof(typename Eigen::MatrixXf::Scalar);
const size_t addressOffset =
Alignment / sizeOfScalar - (((size_t)mat.data() % Alignment) / sizeOfScalar);

// If the current alignment already meets the requirement, no offset is needed.
if (addressOffset == Alignment / sizeOfScalar) {
return 0;
}

return addressOffset;
}

/// Checks if the data of the matrix is aligned correctly.
template <size_t Alignment>
void checkAlignment(const Eigen::Ref<Eigen::MatrixXf>& mat) {
void checkAlignment(const Eigen::Ref<Eigen::MatrixXf>& mat, size_t offset = 0) {
MT_THROW_IF(
(uintptr_t(mat.data())) % Alignment != 0,
(uintptr_t(mat.data() + offset)) % Alignment != 0,
"Matrix ({}x{}, ptr: {}) is not aligned ({}) correctly.",
mat.rows(),
mat.cols(),
Expand Down