Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid calling useResource on resources in argument buffers #2402

Merged
merged 6 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions MoltenVK/MoltenVK/API/mvk_datatypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,15 @@ typedef enum {
kMVKShaderStageMax = kMVKShaderStageCount // Public API legacy value
} MVKShaderStage;

typedef enum {
kMVKBarrierStageVertex = 0,
kMVKBarrierStageFragment,
kMVKBarrierStageCompute,
kMVKBarrierStageCopy,
kMVKBarrierStageNone,
kMVKBarrierStageCount = kMVKBarrierStageNone
} MVKBarrierStage;

/** Returns the Metal MTLColorWriteMask corresponding to the specified Vulkan VkColorComponentFlags. */
MTLColorWriteMask mvkMTLColorWriteMaskFromVkChannelFlags(VkColorComponentFlags vkWriteFlags);

Expand Down
1 change: 1 addition & 0 deletions MoltenVK/MoltenVK/API/mvk_private_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ typedef struct {
VkBool32 needsCubeGradWorkaround; /**< If true, sampling from cube textures with explicit gradients is broken and needs a workaround. */
VkBool32 nativeTextureAtomics; /**< If true, atomic operations on textures are supported natively. */
VkBool32 needsArgumentBufferEncoders; /**< If true, Metal argument buffer encoders are needed to populate argument buffer content. */
VkBool32 residencySets; /**< If true, the device supports creating residency sets. */
} MVKPhysicalDeviceMetalFeatures;


Expand Down
38 changes: 38 additions & 0 deletions MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,35 @@
return VK_SUCCESS;
}

static uint64_t mvkPipelineStageFlagsToBarrierStages(VkPipelineStageFlags2 flags) {
uint64_t result = 0;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT |
VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT | VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT | VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageVertex;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | VK_PIPELINE_STAGE_2_FRAGMENT_DENSITY_PROCESS_BIT_EXT |
VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageFragment;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageCompute;

if (mvkIsAnyFlagEnabled(flags, VK_PIPELINE_STAGE_2_BLIT_BIT | VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT |
VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_TRANSFER_BIT | VK_PIPELINE_STAGE_2_RESOLVE_BIT |
VK_PIPELINE_STAGE_2_CLEAR_BIT | VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT |
VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT))
result |= 1 << kMVKBarrierStageCopy;

return result;
}

template <size_t N>
VkResult MVKCmdPipelineBarrier<N>::setContent(MVKCommandBuffer* cmdBuff,
VkPipelineStageFlags srcStageMask,
Expand Down Expand Up @@ -157,6 +186,15 @@
}
#endif

if (!cmdEncoder->_mtlRenderEncoder && cmdEncoder->isUsingMetalArgumentBuffers() && cmdEncoder->getDevice()->hasResidencySet()) {
cmdEncoder->endCurrentMetalEncoding();

for (auto& b : _barriers) {
uint64_t sourceStageMask = mvkPipelineStageFlagsToBarrierStages(b.srcStageMask), destStageMask = mvkPipelineStageFlagsToBarrierStages(b.dstStageMask);
cmdEncoder->setBarrier(sourceStageMask, destStageMask);
}
}

// Apple GPUs do not support renderpass barriers, and do not support rendering/writing
// to an attachment and then reading from that attachment within a single renderpass.
// So, in the case where we are inside a Metal renderpass, we need to split those activities
Expand Down
21 changes: 19 additions & 2 deletions MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,8 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma
id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPD];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mvkMTLRenderCommandEncoderLabel(commandUse));

cmdEncoder->barrierWait(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

float zIncr;
if (blitKey.srcMTLTextureType == MTLTextureType3D) {
// In this case, I need to interpolate along the third dimension manually.
Expand Down Expand Up @@ -678,7 +680,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

NSUInteger instanceCount = isLayeredBlit ? mtlRPD.renderTargetArrayLengthMVK : 1;
[mtlRendEnc drawPrimitives: MTLPrimitiveTypeTriangleStrip vertexStart: 0 vertexCount: kMVKBlitVertexCount instanceCount: instanceCount];
[mtlRendEnc popDebugGroup];

cmdEncoder->barrierUpdate(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc popDebugGroup];
[mtlRendEnc endEncoding];
}
}
Expand Down Expand Up @@ -897,6 +902,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

[mtlRendEnc pushDebugGroup: @"vkCmdResolveImage"];
[mtlRendEnc popDebugGroup];

cmdEncoder->barrierWait(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->barrierUpdate(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
}
}
Expand Down Expand Up @@ -1700,6 +1709,10 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mtlRendEncName);

cmdEncoder->barrierWait(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->barrierUpdate(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
} else {
for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
Expand All @@ -1715,7 +1728,11 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma

id<MTLRenderCommandEncoder> mtlRendEnc = [cmdEncoder->_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc];
cmdEncoder->_cmdBuffer->setMetalObjectLabel(mtlRendEnc, mtlRendEncName);
[mtlRendEnc endEncoding];

cmdEncoder->barrierWait(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);
cmdEncoder->barrierUpdate(kMVKBarrierStageCopy, mtlRendEnc, MTLRenderStageFragment);

[mtlRendEnc endEncoding];
}
}
}
Expand Down
30 changes: 30 additions & 0 deletions MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,19 @@ typedef uint64_t MVKMTLCommandBufferID;
#pragma mark -
#pragma mark MVKCommandEncodingContext

struct BarrierFenceSlots {
uint32_t updateDirtyBits = ~0;
int update[kMVKBarrierStageCount] = {};
int wait[kMVKBarrierStageCount][kMVKBarrierStageCount] = {};
};

/** Context for tracking information across multiple encodings. */
typedef struct MVKCommandEncodingContext {
NSUInteger mtlVisibilityResultOffset = 0;
const MVKMTLBufferAllocation* visibilityResultBuffer = nullptr;
BarrierFenceSlots fenceSlots;

void syncFences(MVKDevice *device, id<MTLCommandBuffer> mtlCommandBuffer);
MVKRenderPass* getRenderPass() { return _renderPass; }
MVKFramebuffer* getFramebuffer() { return _framebuffer; }
void setRenderingContext(MVKRenderPass* renderPass, MVKFramebuffer* framebuffer);
Expand Down Expand Up @@ -410,6 +418,27 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
/** Returns the command encoding pool. */
MVKCommandEncodingPool* getCommandEncodingPool();

#pragma mark Barriers

/** Encode waits in the current command encoder for the stage that corresponds to given use. */
void encodeBarrierWaits(MVKCommandUse use);

/** Update fences for the currently executing pipeline stage. */
void encodeBarrierUpdates();

/** Insert a new execution barrier */
void setBarrier(uint64_t sourceStageMask, uint64_t destStageMask);

/** Encode waits for a specific stage in given encoder. */
void barrierWait(MVKBarrierStage stage, id<MTLRenderCommandEncoder> mtlEncoder, MTLRenderStages beforeStages);
void barrierWait(MVKBarrierStage stage, id<MTLBlitCommandEncoder> mtlEncoder);
void barrierWait(MVKBarrierStage stage, id<MTLComputeCommandEncoder> mtlEncoder);

/** Encode update for a specific stage in given encoder. */
void barrierUpdate(MVKBarrierStage stage, id<MTLRenderCommandEncoder> mtlEncoder, MTLRenderStages afterStages);
void barrierUpdate(MVKBarrierStage stage, id<MTLBlitCommandEncoder> mtlEncoder);
void barrierUpdate(MVKBarrierStage stage, id<MTLComputeCommandEncoder> mtlEncoder);

Comment on lines +421 to +441
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just making a note that we should explore the possibility of reimplementing VkEvents on top of these...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#pragma mark Queries

/** Begins an occlusion query. */
Expand Down Expand Up @@ -492,6 +521,7 @@ class MVKCommandEncoder : public MVKBaseDeviceObject {
NSString* getMTLRenderCommandEncoderName(MVKCommandUse cmdUse);
template<typename T> void retainIfImmediatelyEncoding(T& mtlEnc);
template<typename T> void endMetalEncoding(T& mtlEnc);
id<MTLFence> getBarrierStageFence(MVKBarrierStage stage);

typedef struct GPUCounterQuery {
MVKGPUCounterQueryPool* queryPool = nullptr;
Expand Down
Loading