Skip to content

Commit e4a1008

Browse files
authored
Support layer: Serialize with vkDeviceWaitIdle (#115)
1 parent 1d1f2d9 commit e4a1008

File tree

5 files changed

+51
-10
lines changed

5 files changed

+51
-10
lines changed

layer_gpu_support/README_LAYER.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,16 @@ convenience options to force disable or enable all serialization.
8585
* If the `none` option is `true` then no serialization is applied, irrespective
8686
of other settings.
8787
* Else, if the `all` option is `true` then all serialization is applied,
88-
irrespective of other settings.
88+
irrespective of other settings, with the exception of `queue_wait_idle` which
89+
must be enabled individually due to its extreme performance overhead.
8990
* Else, the individual options are applied as specified.
9091

9192
```jsonc
9293
"serialize": {
93-
"none": false, // Enable no serialization options
94-
"all": false, // Enable all serialization options
95-
"queue": false, // Enable cross-queue serialization of submits
94+
"none": false, // Enable no serialization options
95+
"all": false, // Enable all serialization options except queue_wait_idle
96+
"queue": false, // Force cross-queue serialization of submits
97+
"queue_wait_idle": false, // Insert vkDeviceWaitIdle after submits
9698
"commandstream": {
9799
"compute": {
98100
"pre": false, // Insert full barrier before dispatches

layer_gpu_support/layer_config.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
"post": false
2626
}
2727
},
28-
"queue": false
28+
"queue": false,
29+
"queue_wait_idle": false
2930
},
3031
"shader": {
3132
"disable_cache": false,

layer_gpu_support/source/layer_config.cpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ void LayerConfig::parse_serialization_options(const json& config)
4646
// Decode top level options
4747
bool s_all = serialize.at("all");
4848
bool s_none = serialize.at("none");
49-
bool s_queue = serialize.at("queue");
49+
bool s_queue_to_queue = serialize.at("queue");
50+
bool s_queue_to_cpu = serialize.at("queue_wait_idle");
5051

5152
// Decode command stream options
5253
json s_stream = serialize.at("commandstream");
@@ -67,7 +68,11 @@ void LayerConfig::parse_serialization_options(const json& config)
6768
bool s_stream_tx_post = s_stream.at("transfer").at("post");
6869

6970
// Write after all options read from JSON so we know it parsed correctly
70-
conf_serialize_queues = (!s_none) && (s_all || s_queue);
71+
conf_serialize_queues = (!s_none) && (s_all || s_queue_to_queue);
72+
73+
// This is not enabled by "all" and is a special case because it has a
74+
// exceptionally high performance penalty compared to the other options
75+
conf_serialize_queue_wait_idle = (!s_none) && s_queue_to_cpu;
7176

7277
conf_serialize_dispatch_pre = (!s_none) && (s_all || s_stream_c_pre);
7378
conf_serialize_dispatch_post = (!s_none) && (s_all || s_stream_c_post);
@@ -86,7 +91,8 @@ void LayerConfig::parse_serialization_options(const json& config)
8691

8792
LAYER_LOG("Layer serialization configuration");
8893
LAYER_LOG("=================================");
89-
LAYER_LOG(" - Serialize queues: %d", conf_serialize_queues);
94+
LAYER_LOG(" - Serialize queue submit: %d", conf_serialize_queues);
95+
LAYER_LOG(" - Wait idle after queue submit: %d", conf_serialize_queue_wait_idle);
9096
LAYER_LOG(" - Serialize compute pre: %d", conf_serialize_dispatch_pre);
9197
LAYER_LOG(" - Serialize compute post: %d", conf_serialize_dispatch_post);
9298
LAYER_LOG(" - Serialize render pass pre: %d", conf_serialize_render_pass_pre);
@@ -269,6 +275,13 @@ bool LayerConfig::serialize_queue() const
269275
return conf_serialize_queues;
270276
}
271277

278+
/* See header for documentation. */
279+
bool LayerConfig::serialize_queue_wait_idle() const
280+
{
281+
return conf_serialize_queue_wait_idle;
282+
}
283+
284+
272285
/* See header for documentation. */
273286
bool LayerConfig::serialize_cmdstream_compute_dispatch_pre() const
274287
{

layer_gpu_support/source/layer_config.hpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,15 @@ class LayerConfig
5252
// Config queries for serializer
5353

5454
/**
55-
* @brief True if config wants to serialize before compute workloads.
55+
* @brief True if config wants to serialize across queue submits.
5656
*/
5757
bool serialize_queue() const;
5858

59+
/**
60+
* @brief True if config wants to serialize queue submits with the CPU.
61+
*/
62+
bool serialize_queue_wait_idle() const;
63+
5964
/**
6065
* @brief True if config wants to serialize before compute workloads.
6166
*/
@@ -178,10 +183,15 @@ class LayerConfig
178183

179184
private:
180185
/**
181-
* @brief True if we force serialize across queues.
186+
* @brief True if we force serialize all queue submits.
182187
*/
183188
bool conf_serialize_queues {false};
184189

190+
/**
191+
* @brief True if we force device idle after each queue submit.
192+
*/
193+
bool conf_serialize_queue_wait_idle {false};
194+
185195
/**
186196
* @brief True if we force serialize before compute dispatches.
187197
*/

layer_gpu_support/source/layer_device_functions_queue.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ VKAPI_ATTR VkResult VKAPI_CALL
9999
layer->driver.vkQueueSubmit(queue, 1, &submitInfoPost, VK_NULL_HANDLE);
100100
}
101101

102+
if (layer->instance->config.serialize_queue_wait_idle())
103+
{
104+
layer->driver.vkDeviceWaitIdle(layer->device);
105+
}
106+
102107
return result;
103108
}
104109

@@ -178,6 +183,11 @@ VKAPI_ATTR VkResult VKAPI_CALL
178183
layer->driver.vkQueueSubmit2(queue, 1, &submitInfoPost, VK_NULL_HANDLE);
179184
}
180185

186+
if (layer->instance->config.serialize_queue_wait_idle())
187+
{
188+
layer->driver.vkDeviceWaitIdle(layer->device);
189+
}
190+
181191
return result;
182192
}
183193

@@ -257,5 +267,10 @@ VKAPI_ATTR VkResult VKAPI_CALL
257267
layer->driver.vkQueueSubmit2KHR(queue, 1, &submitInfoPost, VK_NULL_HANDLE);
258268
}
259269

270+
if (layer->instance->config.serialize_queue_wait_idle())
271+
{
272+
layer->driver.vkDeviceWaitIdle(layer->device);
273+
}
274+
260275
return result;
261276
}

0 commit comments

Comments
 (0)