Skip to content

Commit 1dfcf15

Browse files
authored
Merge pull request #2 from tor4z/feat-multi_input
Multiple output supported
2 parents 3731c71 + 5f50cfd commit 1dfcf15

File tree

4 files changed

+106
-62
lines changed

4 files changed

+106
-62
lines changed

cov.hpp

Lines changed: 93 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <optional>
77
#include <vector>
88
#include <string_view>
9+
#include <utility>
910
#include <vulkan/vulkan.h>
1011

1112
#define COV_DEF_SINGLETON(classname) \
@@ -50,6 +51,10 @@ class Device
5051

5152
class Instance
5253
{
54+
struct InputSet {
55+
size_t offset;
56+
size_t size;
57+
}; // struct InputSet
5358
public:
5459
~Instance() { destroy(); }
5560
// not copiable
@@ -59,8 +64,8 @@ class Instance
5964
Instance(Instance&&);
6065
Instance& operator=(Instance&&);
6166

62-
bool set_input(const void* data, size_t size);
63-
void set_output(size_t size);
67+
bool set_inputs(const std::vector<std::pair<const void*, size_t>>& inputs);
68+
void def_output(size_t size);
6469
bool get_output(void* data, size_t size);
6570
bool load_shader(const std::string_view& shader_path, void* spec_data=nullptr, size_t spec_data_len=0);
6671
void add_spec_item(uint32_t const_id, uint32_t offset, size_t item_size);
@@ -83,7 +88,10 @@ class Instance
8388
VkShaderModule shader_module_;
8489
VkSpecializationInfo spec_info_;
8590
std::vector<VkSpecializationMapEntry> spec_map_entryies_;
91+
std::vector<InputSet> input_sets_;
8692
uint32_t queue_index_;
93+
size_t input_size_;
94+
size_t output_size_;
8795

8896
friend class App;
8997
Instance(VkInstance vk_instance);
@@ -232,6 +240,8 @@ Instance::Instance(VkInstance vk_instance)
232240
, device_(VK_NULL_HANDLE)
233241
, shader_module_(VK_NULL_HANDLE)
234242
, queue_index_(-1)
243+
, input_size_(0)
244+
, output_size_(0)
235245
{
236246
PhysicalDevice physical_device_creator;
237247
Device device_creator;
@@ -265,6 +275,9 @@ Instance::Instance(Instance&& other)
265275
queue_index_ = other.queue_index_;
266276
spec_info_ = other.spec_info_;
267277
spec_map_entryies_ = other.spec_map_entryies_;
278+
input_sets_ = other.input_sets_;
279+
input_size_ = other.input_size_;
280+
output_size_ = other.output_size_;
268281

269282
other.vk_instance_ = VK_NULL_HANDLE;
270283
other.cmd_pool_ = VK_NULL_HANDLE;
@@ -282,6 +295,9 @@ Instance::Instance(Instance&& other)
282295
other.shader_module_ = VK_NULL_HANDLE;
283296
other.queue_index_ = -1;
284297
other.spec_map_entryies_.clear();
298+
other.input_sets_.clear();
299+
other.input_size_ = 0;
300+
other.output_size_ = 0;
285301
}
286302

287303
Instance& Instance::operator=(Instance&& other)
@@ -308,6 +324,9 @@ Instance& Instance::operator=(Instance&& other)
308324
queue_index_ = other.queue_index_;
309325
spec_info_ = other.spec_info_;
310326
spec_map_entryies_ = other.spec_map_entryies_;
327+
input_sets_ = other.input_sets_;
328+
input_size_ = other.input_size_;
329+
output_size_ = other.output_size_;
311330

312331
other.vk_instance_ = VK_NULL_HANDLE;
313332
other.cmd_pool_ = VK_NULL_HANDLE;
@@ -325,6 +344,9 @@ Instance& Instance::operator=(Instance&& other)
325344
other.shader_module_ = VK_NULL_HANDLE;
326345
other.queue_index_ = -1;
327346
other.spec_map_entryies_.clear();
347+
other.input_sets_.clear();
348+
other.input_size_ = 0;
349+
other.output_size_ = 0;
328350

329351
return *this;
330352
}
@@ -381,27 +403,39 @@ void Instance::destroy()
381403
spec_map_entryies_.clear();
382404
}
383405

384-
bool Instance::set_input(const void* data, size_t size)
406+
bool Instance::set_inputs(const std::vector<std::pair<const void*, size_t>>& inputs)
385407
{
386-
create_buffer(device_, phy_device_, size,
387-
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
388-
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, in_host_buff_, in_host_memory_);
389-
create_buffer(device_, phy_device_, size,
390-
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
391-
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, in_device_buff_, in_device_memory_);
392-
393-
if (data) {
394-
void* mapped_data;
395-
vkMapMemory(device_, in_host_memory_, 0, size, 0, &mapped_data);
396-
memcpy(mapped_data, reinterpret_cast<const void*>(data), size);
408+
size_t all_size{0};
409+
if (input_size_ == 0) {
410+
size_t offset{0};
411+
input_size_ = all_size;
412+
for (const auto& input : inputs) {
413+
input_sets_.emplace_back(InputSet{.offset = offset, .size = input.second});
414+
all_size = offset + input.second;
415+
offset = (input.second / 64 + 1) * 64;
416+
}
397417

418+
create_buffer(device_, phy_device_, all_size,
419+
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
420+
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, in_host_buff_, in_host_memory_);
421+
create_buffer(device_, phy_device_, all_size,
422+
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
423+
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, in_device_buff_, in_device_memory_);
424+
} else {
425+
assert(input_size_ == all_size && "Input size not matched");
426+
}
427+
428+
{
398429
VkMappedMemoryRange mem_range{};
399430
mem_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
400-
mem_range.size = size;
401-
mem_range.offset = 0;
402-
mem_range.memory = in_host_memory_;
403-
COV_CHECK_ASSERT(vkFlushMappedMemoryRanges(device_, 1, &mem_range));
404-
vkUnmapMemory(device_, in_host_memory_);
431+
432+
for (size_t i = 0; i < inputs.size(); ++i) {
433+
void* mapped_data;
434+
const auto& input{inputs.at(i)};
435+
vkMapMemory(device_, in_host_memory_, input_sets_.at(i).offset, input.second, 0, &mapped_data);
436+
memcpy(mapped_data, reinterpret_cast<const void*>(input.first), input.second);
437+
vkUnmapMemory(device_, in_host_memory_);
438+
}
405439
}
406440

407441
VkCommandBufferAllocateInfo cmd_alloc_info{};
@@ -417,7 +451,7 @@ bool Instance::set_input(const void* data, size_t size)
417451
cmd_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
418452
COV_CHECK_ASSERT(vkBeginCommandBuffer(cmd_buff, &cmd_begin_info));
419453
VkBufferCopy copy_region{};
420-
copy_region.size = size;
454+
copy_region.size = all_size;
421455
vkCmdCopyBuffer(cmd_buff, in_host_buff_, in_device_buff_, 1, &copy_region);
422456
COV_CHECK_ASSERT(vkEndCommandBuffer(cmd_buff));
423457

@@ -440,14 +474,17 @@ bool Instance::set_input(const void* data, size_t size)
440474
return true;
441475
}
442476

443-
void Instance::set_output(size_t size)
477+
void Instance::def_output(size_t size)
444478
{
445-
create_buffer(device_, phy_device_, size,
446-
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
447-
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, out_host_buff_, out_host_memory_);
448-
create_buffer(device_, phy_device_, size,
449-
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
450-
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, out_device_buff_, out_device_memory_);
479+
if (output_size_ == 0) {
480+
output_size_ = size;
481+
create_buffer(device_, phy_device_, size,
482+
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
483+
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, out_host_buff_, out_host_memory_);
484+
create_buffer(device_, phy_device_, size,
485+
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
486+
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, out_device_buff_, out_device_memory_);
487+
}
451488
}
452489

453490

@@ -539,10 +576,11 @@ void Instance::add_spec_item(uint32_t const_id, uint32_t offset, size_t item_siz
539576

540577
bool Instance::execute(const std::array<int, 3> dims)
541578
{
579+
auto num_sets{input_sets_.size() + 1};
542580
VkDescriptorPool desc_pool{};
543581
VkPipelineLayout pipeline_layout{};
544-
std::vector<VkDescriptorSetLayout> desc_set_layout(2);
545-
std::vector<VkDescriptorSet> desc_set(2);
582+
std::vector<VkDescriptorSetLayout> desc_set_layout(num_sets);
583+
std::vector<VkDescriptorSet> desc_set(num_sets);
546584
VkPipelineCache pipeline_cache{};
547585
VkPipeline comp_pipeline{};
548586

@@ -557,7 +595,7 @@ bool Instance::execute(const std::array<int, 3> dims)
557595
pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
558596
pool_create_info.poolSizeCount = static_cast<uint32_t>(pool_sizes.size());
559597
pool_create_info.pPoolSizes = pool_sizes.data();
560-
pool_create_info.maxSets = 2;
598+
pool_create_info.maxSets = num_sets;
561599
COV_CHECK_ASSERT(vkCreateDescriptorPool(device_, &pool_create_info, nullptr, &desc_pool))
562600

563601
std::vector<VkDescriptorSetLayoutBinding> set_layout_bindings{
@@ -573,8 +611,9 @@ bool Instance::execute(const std::array<int, 3> dims)
573611
layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
574612
layout_create_info.bindingCount = static_cast<uint32_t>(set_layout_bindings.size());
575613
layout_create_info.pBindings = set_layout_bindings.data();
576-
COV_CHECK_ASSERT(vkCreateDescriptorSetLayout(device_, &layout_create_info, nullptr, &desc_set_layout[0]))
577-
COV_CHECK_ASSERT(vkCreateDescriptorSetLayout(device_, &layout_create_info, nullptr, &desc_set_layout[1]))
614+
for (size_t i = 0; i < num_sets; ++i) {
615+
COV_CHECK_ASSERT(vkCreateDescriptorSetLayout(device_, &layout_create_info, nullptr, &desc_set_layout[i]))
616+
}
578617

579618
VkPipelineLayoutCreateInfo pipeline_create_info{};
580619
pipeline_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
@@ -584,39 +623,37 @@ bool Instance::execute(const std::array<int, 3> dims)
584623

585624
VkDescriptorSetAllocateInfo desc_alloc_info{};
586625
desc_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
587-
desc_alloc_info.descriptorSetCount = 2;
626+
desc_alloc_info.descriptorSetCount = num_sets;
588627
desc_alloc_info.descriptorPool = desc_pool;
589628
desc_alloc_info.pSetLayouts = desc_set_layout.data();
590629
COV_CHECK_ASSERT(vkAllocateDescriptorSets(device_, &desc_alloc_info, desc_set.data()))
591630

592-
std::vector<VkDescriptorBufferInfo> desc_buff_info(2);
593-
desc_buff_info[0].range = VK_WHOLE_SIZE;
594-
desc_buff_info[0].offset = 0;
595-
desc_buff_info[0].buffer = in_device_buff_;
631+
std::vector<VkDescriptorBufferInfo> desc_buff_info(num_sets);
596632

597-
desc_buff_info[1].range = VK_WHOLE_SIZE;
598-
desc_buff_info[1].offset = 0;
599-
desc_buff_info[1].buffer = out_device_buff_;
633+
for (size_t i = 0; i < input_sets_.size(); ++i) {
634+
const auto& input_set_info{input_sets_.at(i)};
635+
desc_buff_info[i].range = VK_WHOLE_SIZE;
636+
desc_buff_info[i].offset = input_set_info.offset;
637+
desc_buff_info[i].buffer = in_device_buff_;
638+
}
639+
desc_buff_info[num_sets - 1].range = VK_WHOLE_SIZE;
640+
desc_buff_info[num_sets - 1].offset = 0;
641+
desc_buff_info[num_sets - 1].buffer = out_device_buff_;
600642

601-
std::vector<VkWriteDescriptorSet> write_desc_set{
602-
VkWriteDescriptorSet{
603-
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
604-
.dstSet = desc_set[0],
605-
.dstBinding = 0,
606-
.descriptorCount = 1,
607-
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
608-
.pBufferInfo = &desc_buff_info[0]
609-
},
610-
VkWriteDescriptorSet{
643+
std::vector<VkWriteDescriptorSet> write_desc_sets;
644+
write_desc_sets.reserve(num_sets);
645+
for (size_t i = 0; i < num_sets; ++i) {
646+
write_desc_sets.emplace_back(VkWriteDescriptorSet{
611647
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
612-
.dstSet = desc_set[1],
648+
.dstSet = desc_set[i],
613649
.dstBinding = 0,
614650
.descriptorCount = 1,
615651
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
616-
.pBufferInfo = &desc_buff_info[1]
617-
}
618-
};
619-
vkUpdateDescriptorSets(device_, write_desc_set.size(), write_desc_set.data(), 0, nullptr);
652+
.pBufferInfo = &desc_buff_info[i]
653+
});
654+
}
655+
656+
vkUpdateDescriptorSets(device_, write_desc_sets.size(), write_desc_sets.data(), 0, nullptr);
620657

621658
VkPipelineCacheCreateInfo pipeline_cache_create_info{};
622659
pipeline_cache_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
@@ -673,7 +710,7 @@ bool Instance::execute(const std::array<int, 3> dims)
673710
0, nullptr);
674711

675712
vkCmdBindPipeline(cmd_buff, VK_PIPELINE_BIND_POINT_COMPUTE, comp_pipeline);
676-
vkCmdBindDescriptorSets(cmd_buff, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 2, desc_set.data(), 0, nullptr);
713+
vkCmdBindDescriptorSets(cmd_buff, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, num_sets, desc_set.data(), 0, nullptr);
677714
vkCmdDispatch(cmd_buff, dims[0], dims[1], dims[2]);
678715

679716
// mem_buff_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;

examples/01-basic_usage.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ int main()
1919
const std::string shader_path{"../examples/shader/headless.comp.spv"}; // suppose we run this program on build dir
2020

2121
// std::vector<int> in_data{1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7};
22-
std::vector<int> in_data{7, 7, 7, 7};
22+
std::vector<int> in_data{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7};
2323
std::vector<int> out_data(in_data.size());
2424

2525
cov::App::init("HelloCOV");
@@ -30,11 +30,14 @@ int main()
3030
auto instance{cov::App::new_instance()};
3131
instance.load_shader(shader_path, &spec_data, sizeof(spec_data));
3232
instance.add_spec_item(0, 0, sizeof(uint32_t));
33-
instance.set_input(in_data.data(), in_data.size() * sizeof(in_data.at(0)));
34-
instance.set_output(out_data.size() * sizeof(out_data.at(0)));
35-
33+
instance.set_inputs({
34+
{in_data.data(), in_data.size() * sizeof(in_data.at(0))},
35+
{in_data.data(), in_data.size() * sizeof(in_data.at(0))},
36+
});
37+
instance.def_output(out_data.size() * sizeof(out_data.at(0)));
38+
3639
if (!instance.execute({static_cast<int>(in_data.size()), 1, 1})) {
37-
std::cerr << "execute shader program failed\n";
40+
std::cerr << "Execute shader program failed\n";
3841
}
3942
instance.get_output(out_data.data(), out_data.size() * sizeof(out_data.at(0)));
4043
// The instance will be automatically destroy here.

examples/shader/headless.comp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ layout(set = 0, binding = 0) buffer Pos {
44
uint in_values[ ];
55
};
66

7-
layout(set = 1, binding = 0) buffer Pos2 {
7+
layout(set = 1, binding = 0) buffer Pos1 {
8+
uint in_values1[ ];
9+
};
10+
11+
layout(set = 2, binding = 0) buffer Pos2 {
812
uint out_values[ ];
913
};
1014

examples/shader/headless.comp.spv

188 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)