Skip to content
15 changes: 13 additions & 2 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1304,17 +1304,28 @@ int Target::get_arm_v8_lower_bound() const {
}

bool Target::supports_type(const Type &t) const {
if (has_feature(Vulkan)) {
if (t.is_float() && t.bits() == 64) {
return has_feature(Target::VulkanFloat64);
} else if (t.is_float() && t.bits() == 16) {
return has_feature(Target::VulkanFloat16);
} else if (t.is_int_or_uint() && t.bits() == 64) {
return has_feature(Target::VulkanInt64);
} else if (t.is_int_or_uint() && t.bits() == 16) {
return has_feature(Target::VulkanInt16);
} else if (t.is_int_or_uint() && t.bits() == 8) {
return has_feature(Target::VulkanInt8);
}
}
if (t.bits() == 64) {
if (t.is_float()) {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) &&
!has_feature(WebGPU));
} else {
return (!has_feature(Metal) &&
!has_feature(D3D12Compute) &&
(!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) &&
!has_feature(WebGPU));
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/internal/memory_resources.h
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ ALWAYS_INLINE size_t aligned_size(size_t offset, size_t size, size_t alignment)
ALWAYS_INLINE size_t conform_size(size_t offset, size_t size, size_t alignment, size_t nearest_multiple) {
size_t adjusted_size = aligned_size(offset, size, alignment);
adjusted_size = (alignment > adjusted_size) ? alignment : adjusted_size;
if (nearest_multiple > 0) {
if ((nearest_multiple > 0) && ((adjusted_size % nearest_multiple) != 0)) {
size_t rounded_size = (((adjusted_size + nearest_multiple - 1) / nearest_multiple) * nearest_multiple);
return rounded_size;
} else {
Expand Down
54 changes: 48 additions & 6 deletions src/runtime/internal/region_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class RegionAllocator {
BlockRegion *coalesce_block_regions(void *user_context, BlockRegion *region);

// Returns true if the given region can be split to accomodate the given size
bool can_split(const BlockRegion *region, const MemoryRequest &request) const;
bool can_split(void *use_context, const BlockRegion *region, const MemoryRequest &request) const;

// Splits the given block region into a smaller region to accomodate the given size, followed by empty space for the remaining
BlockRegion *split_block_region(void *user_context, BlockRegion *region, const MemoryRequest &request);
Expand Down Expand Up @@ -195,7 +195,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest &
return nullptr;
}

if (can_split(block_region, region_request)) {
if (can_split(user_context, block_region, region_request)) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") "
<< "to accomodate requested size (" << (int32_t)(region_request.size) << " bytes)";
Expand Down Expand Up @@ -443,8 +443,29 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe
return block_region;
}

bool RegionAllocator::can_split(const BlockRegion *block_region, const MemoryRequest &split_request) const {
return (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0));
bool RegionAllocator::can_split(void *user_context, const BlockRegion *block_region, const MemoryRequest &split_request) const {

// See if we can actually split the block region and create empty space big enough
if (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)) {

// We can only split if there's still room left after conforming the allocation request since the
// conform method may actually grow the requested size to accomodate alignment constraints
MemoryRequest test_request = split_request;
test_request.size = block_region->memory.size - test_request.size;
test_request.offset = block_region->memory.offset + test_request.size;
int error_code = conform(user_context, &test_request);
if (error_code) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(nullptr) << "RegionAllocator: Failed to conform test request for splitting block region!\n";
#endif
return false;
}

if ((block_region->memory.size - test_request.size) > 0) {
return true;
}
}
return false;
}

BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, const MemoryRequest &request) {
Expand All @@ -470,8 +491,9 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion

#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Splitting "
<< "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) "
<< "to create empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)";
<< "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) into ...\n\t"
<< "existing region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size - split_request.size) << " bytes)\n\t"
<< "empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)\n";
#endif
BlockRegion *next_region = block_region->next_ptr;
BlockRegion *empty_region = create_block_region(user_context, split_request);
Expand All @@ -484,6 +506,12 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion
empty_region->prev_ptr = block_region;
block_region->next_ptr = empty_region;
block_region->memory.size -= empty_region->memory.size;

#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << "RegionAllocator: Split block region into ...\n\t"
<< "existing region (ptr=" << (void *)block_region << " prev_ptr=" << block_region->prev_ptr << " next_ptr=" << block_region->next_ptr << " offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)\n\t"
<< "empty region (ptr=" << (void *)empty_region << " prev_ptr=" << empty_region->prev_ptr << " next_ptr=" << empty_region->next_ptr << " offset=" << (int32_t)empty_region->memory.offset << " size=" << (int32_t)(empty_region->memory.size) << " bytes)\n";
#endif
return empty_region;
}

Expand Down Expand Up @@ -605,8 +633,22 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r
#endif
halide_abort_if_false(user_context, allocators.region.allocate != nullptr);
halide_abort_if_false(user_context, block_region->status == AllocationStatus::Available);

int error_code = 0;
MemoryRegion *memory_region = &(block_region->memory);
if (memory_region->size <= 0) {
#ifdef DEBUG_RUNTIME_INTERNAL
debug(user_context) << " skipping zero size region ("
<< "block_ptr=" << (void *)block_region->block_ptr << " "
<< "block_region=" << (void *)block_region << " "
<< "memory_offset=" << (uint32_t)(block_region->memory.offset) << " "
<< "memory_size=" << (uint32_t)(block_region->memory.size) << " "
<< "block_reserved=" << (uint32_t)block->reserved << " "
<< ")\n";
#endif
return error_code;
}

if (memory_region->handle == nullptr) {
error_code = allocators.region.allocate(user_context, memory_region);
memory_region->is_owner = true;
Expand Down
92 changes: 63 additions & 29 deletions src/runtime/vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1199,13 +1199,6 @@ WEAK int halide_vulkan_run(void *user_context,
}
}
}

// 2b. Create the pipeline layout
error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to create pipeline layout!\n";
return error_code;
}
}

VulkanDispatchData dispatch_data = {};
Expand All @@ -1219,16 +1212,8 @@ WEAK int halide_vulkan_run(void *user_context,

VulkanShaderBinding *entry_point_binding = (shader_module->shader_bindings + entry_point_index);

// 2c. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size)
error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to setup compute pipeline!\n";
return error_code;
}

// 2d. Create a descriptor set
if (entry_point_binding->descriptor_set == VK_NULL_HANDLE) {

// 2c. If Push Descriptor Set isn't supported, then allocate a descriptor set
if ((vkCmdPushDescriptorSetKHR == nullptr) && (entry_point_binding->descriptor_set == VK_NULL_HANDLE)) {
// Construct a descriptor pool
//
// NOTE: while this could be re-used across multiple pipelines, we only know the storage requirements of this kernel's
Expand All @@ -1250,7 +1235,7 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3a. Create a buffer for the scalar parameters
// 2d. Create a buffer for the scalar parameters
if ((entry_point_binding->args_region == nullptr) && entry_point_binding->uniform_buffer_count) {
size_t scalar_buffer_size = vk_estimate_scalar_uniform_buffer_size(user_context, arg_sizes, args, arg_is_buffer);
if (scalar_buffer_size > 0) {
Expand All @@ -1262,7 +1247,7 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3b. Update uniform buffer with scalar parameters
// 2e. Update uniform buffer with scalar parameters
VkBuffer *args_buffer = nullptr;
if ((entry_point_binding->args_region != nullptr) && entry_point_binding->uniform_buffer_count) {
error_code = vk_update_scalar_uniform_buffer(user_context, ctx.allocator, entry_point_binding->args_region, arg_sizes, args, arg_is_buffer);
Expand All @@ -1278,10 +1263,28 @@ WEAK int halide_vulkan_run(void *user_context,
}
}

// 3c. Update buffer bindings for descriptor set
error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set);
// 2f. If Push Descriptor Set isn't supported, then update the buffer bindings for the allocated descriptor set
if (vkCmdPushDescriptorSetKHR == nullptr) {
error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
return error_code;
}
}

// 2b. Create the pipeline layout
if (shader_module->pipeline_layout == VK_NULL_HANDLE) {
error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to create pipeline layout!\n";
return error_code;
}
}

// 3. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size)
error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline));
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
error(user_context) << "Vulkan: Failed to setup compute pipeline!\n";
return error_code;
}

Expand All @@ -1293,18 +1296,49 @@ WEAK int halide_vulkan_run(void *user_context,
}

// 5. Fill the command buffer
error_code = vk_fill_command_buffer_with_dispatch_call(user_context,
ctx.device, cmds.command_buffer,
entry_point_binding->compute_pipeline,
shader_module->pipeline_layout,
entry_point_binding->descriptor_set,
entry_point_index,
blocksX, blocksY, blocksZ);
error_code = vk_begin_command_buffer(user_context, cmds.command_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to start command buffer for dispatch call!\n";
return error_code;
}
error_code = vk_bind_pipeline(user_context, cmds.command_buffer, entry_point_binding->compute_pipeline);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to bind compute pipeline to command buffer for dispatch call!\n";
return error_code;
}

if (vkCmdPushDescriptorSetKHR != nullptr) {
error_code = vk_push_descriptor_set(user_context, ctx.allocator, cmds.command_buffer, entry_point_binding->compute_pipeline, shader_module->pipeline_layout, entry_point_binding->descriptor_set, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to update descriptor set!\n";
return error_code;
}
} else {
error_code = vk_bind_descriptor_sets(user_context, cmds.command_buffer, shader_module->pipeline_layout, entry_point_binding->descriptor_set, entry_point_index);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to bind descriptor set to command buffer for dispatch call!\n";
return error_code;
}
}

error_code = vk_dispatch_kernel(user_context,
ctx.device, cmds.command_buffer,
entry_point_binding->compute_pipeline,
shader_module->pipeline_layout,
entry_point_binding->descriptor_set,
entry_point_index,
blocksX, blocksY, blocksZ);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to fill command buffer with dispatch call!\n";
return error_code;
}

error_code = vk_end_command_buffer(user_context, cmds.command_buffer);
if (error_code != halide_error_code_success) {
error(user_context) << "Vulkan: Failed to end command buffer for dispatch call!\n";
return error_code;
}

// 6. Submit the command buffer to our command queue
error_code = vk_submit_command_buffer(user_context, ctx.queue, cmds.command_buffer);
if (error_code != halide_error_code_success) {
Expand Down
10 changes: 9 additions & 1 deletion src/runtime/vulkan_extensions.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,10 +203,18 @@ uint32_t vk_get_required_device_extensions(void *user_context, StringTable &ext_
uint32_t vk_get_optional_device_extensions(void *user_context, StringTable &ext_table) {
const char *optional_ext_table[] = {
"VK_KHR_portability_subset", //< necessary for running under Molten (aka Vulkan on Mac)
VK_KHR_MAINTENANCE_1_EXTENSION_NAME,
VK_KHR_MAINTENANCE_2_EXTENSION_NAME,
VK_KHR_MAINTENANCE_3_EXTENSION_NAME,
VK_KHR_MAINTENANCE_4_EXTENSION_NAME,
VK_KHR_MAINTENANCE_5_EXTENSION_NAME,
VK_KHR_MAINTENANCE_6_EXTENSION_NAME,
VK_KHR_MAINTENANCE_7_EXTENSION_NAME,
VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME};
VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME};

const uint32_t optional_ext_count = sizeof(optional_ext_table) / sizeof(optional_ext_table[0]);
ext_table.fill(user_context, (const char **)optional_ext_table, optional_ext_count);
return optional_ext_count;
Expand Down
1 change: 1 addition & 0 deletions src/runtime/vulkan_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ VULKAN_FN(vkCmdCopyBuffer2)
// VULKAN_FN(vkCmdCopyImageToBuffer2)
// VULKAN_FN(vkCmdEndRendering)
VULKAN_FN(vkCmdPipelineBarrier2)
VULKAN_FN(vkCmdPushDescriptorSetKHR)
VULKAN_FN(vkCmdResetEvent2)
// VULKAN_FN(vkCmdResolveImage2)
// VULKAN_FN(vkCmdSetCullMode)
Expand Down
Loading
Loading