Support layer: Serialize with vkDeviceWaitIdle (#115)

solidpixel · web-flow · commit e4a100860423 · 2025-05-22T21:32:21.000+01:00
diff --git a/layer_gpu_support/README_LAYER.md b/layer_gpu_support/README_LAYER.md
@@ -85,14 +85,16 @@ convenience options to force disable or enable all serialization.
 * If the `none` option is `true` then no serialization is applied, irrespective
   of other settings.
 * Else, if the `all` option is `true` then all serialization is applied,
-  irrespective of other settings.
+  irrespective of other settings, with the exception of `queue_wait_idle` which
+  must be enabled individually due to its extreme performance overhead.
 * Else, the individual options are applied as specified.
 
 ```jsonc
 "serialize": {
-    "none": false,          // Enable no serialization options
-    "all": false,           // Enable all serialization options
-    "queue": false,         // Enable cross-queue serialization of submits
+    "none": false,            // Enable no serialization options
+    "all": false,             // Enable all serialization options except queue_wait_idle
+    "queue": false,           // Force cross-queue serialization of submits
+    "queue_wait_idle": false, // Insert vkDeviceWaitIdle after submits
     "commandstream": {
         "compute": {
             "pre": false,   // Insert full barrier before dispatches
diff --git a/layer_gpu_support/layer_config.json b/layer_gpu_support/layer_config.json
@@ -25,7 +25,8 @@
                 "post": false
             }
         },
-        "queue": false
+        "queue": false,
+        "queue_wait_idle": false
     },
     "shader": {
         "disable_cache": false,
diff --git a/layer_gpu_support/source/layer_config.cpp b/layer_gpu_support/source/layer_config.cpp
@@ -46,7 +46,8 @@ void LayerConfig::parse_serialization_options(const json& config)
     // Decode top level options
     bool s_all = serialize.at("all");
     bool s_none = serialize.at("none");
-    bool s_queue = serialize.at("queue");
+    bool s_queue_to_queue = serialize.at("queue");
+    bool s_queue_to_cpu = serialize.at("queue_wait_idle");
 
     // Decode command stream options
     json s_stream = serialize.at("commandstream");
@@ -67,7 +68,11 @@ void LayerConfig::parse_serialization_options(const json& config)
     bool s_stream_tx_post = s_stream.at("transfer").at("post");
 
     // Write after all options read from JSON so we know it parsed correctly
-    conf_serialize_queues = (!s_none) && (s_all || s_queue);
+    conf_serialize_queues = (!s_none) && (s_all || s_queue_to_queue);
+
+    // This is not enabled by "all" and is a special case because it has a
+    // exceptionally high performance penalty compared to the other options
+    conf_serialize_queue_wait_idle = (!s_none) && s_queue_to_cpu;
 
     conf_serialize_dispatch_pre = (!s_none) && (s_all || s_stream_c_pre);
     conf_serialize_dispatch_post = (!s_none) && (s_all || s_stream_c_post);
@@ -86,7 +91,8 @@ void LayerConfig::parse_serialization_options(const json& config)
 
     LAYER_LOG("Layer serialization configuration");
     LAYER_LOG("=================================");
-    LAYER_LOG(" - Serialize queues: %d", conf_serialize_queues);
+    LAYER_LOG(" - Serialize queue submit: %d", conf_serialize_queues);
+    LAYER_LOG(" - Wait idle after queue submit: %d", conf_serialize_queue_wait_idle);
     LAYER_LOG(" - Serialize compute pre: %d", conf_serialize_dispatch_pre);
     LAYER_LOG(" - Serialize compute post: %d", conf_serialize_dispatch_post);
     LAYER_LOG(" - Serialize render pass pre: %d", conf_serialize_render_pass_pre);
@@ -269,6 +275,13 @@ bool LayerConfig::serialize_queue() const
     return conf_serialize_queues;
 }
 
+/* See header for documentation. */
+bool LayerConfig::serialize_queue_wait_idle() const
+{
+    return conf_serialize_queue_wait_idle;
+}
+
+
 /* See header for documentation. */
 bool LayerConfig::serialize_cmdstream_compute_dispatch_pre() const
 {
diff --git a/layer_gpu_support/source/layer_config.hpp b/layer_gpu_support/source/layer_config.hpp
@@ -52,10 +52,15 @@ class LayerConfig
     // Config queries for serializer
 
     /**
-     * @brief True if config wants to serialize before compute workloads.
+     * @brief True if config wants to serialize across queue submits.
      */
     bool serialize_queue() const;
 
+    /**
+     * @brief True if config wants to serialize queue submits with the CPU.
+     */
+    bool serialize_queue_wait_idle() const;
+
     /**
      * @brief True if config wants to serialize before compute workloads.
      */
@@ -178,10 +183,15 @@ class LayerConfig
 
 private:
     /**
-     * @brief True if we force serialize across queues.
+     * @brief True if we force serialize all queue submits.
      */
     bool conf_serialize_queues {false};
 
+    /**
+     * @brief True if we force device idle after each queue submit.
+     */
+    bool conf_serialize_queue_wait_idle {false};
+
     /**
      * @brief True if we force serialize before compute dispatches.
      */
diff --git a/layer_gpu_support/source/layer_device_functions_queue.cpp b/layer_gpu_support/source/layer_device_functions_queue.cpp
@@ -99,6 +99,11 @@ VKAPI_ATTR VkResult VKAPI_CALL
         layer->driver.vkQueueSubmit(queue, 1, &submitInfoPost, VK_NULL_HANDLE);
     }
 
+    if (layer->instance->config.serialize_queue_wait_idle())
+    {
+        layer->driver.vkDeviceWaitIdle(layer->device);
+    }
+
     return result;
 }
 
@@ -178,6 +183,11 @@ VKAPI_ATTR VkResult VKAPI_CALL
         layer->driver.vkQueueSubmit2(queue, 1, &submitInfoPost, VK_NULL_HANDLE);
     }
 
+    if (layer->instance->config.serialize_queue_wait_idle())
+    {
+        layer->driver.vkDeviceWaitIdle(layer->device);
+    }
+
     return result;
 }
 
@@ -257,5 +267,10 @@ VKAPI_ATTR VkResult VKAPI_CALL
         layer->driver.vkQueueSubmit2KHR(queue, 1, &submitInfoPost, VK_NULL_HANDLE);
     }
 
+    if (layer->instance->config.serialize_queue_wait_idle())
+    {
+        layer->driver.vkDeviceWaitIdle(layer->device);
+    }
+
     return result;
 }

Original file line number	Diff line number	Diff line change
`@@ -99,6 +99,11 @@ VKAPI_ATTR VkResult VKAPI_CALL`
`99`	`99`	`layer->driver.vkQueueSubmit(queue, 1, &submitInfoPost, VK_NULL_HANDLE);`
`100`	`100`	`}`
`101`	`101`
	`102`	`+ if (layer->instance->config.serialize_queue_wait_idle())`
	`103`	`+ {`
	`104`	`+ layer->driver.vkDeviceWaitIdle(layer->device);`
	`105`	`+ }`
	`106`	`+`
`102`	`107`	`return result;`
`103`	`108`	`}`
`104`	`109`
`@@ -178,6 +183,11 @@ VKAPI_ATTR VkResult VKAPI_CALL`
`178`	`183`	`layer->driver.vkQueueSubmit2(queue, 1, &submitInfoPost, VK_NULL_HANDLE);`
`179`	`184`	`}`
`180`	`185`
	`186`	`+ if (layer->instance->config.serialize_queue_wait_idle())`
	`187`	`+ {`
	`188`	`+ layer->driver.vkDeviceWaitIdle(layer->device);`
	`189`	`+ }`
	`190`	`+`
`181`	`191`	`return result;`
`182`	`192`	`}`
`183`	`193`
`@@ -257,5 +267,10 @@ VKAPI_ATTR VkResult VKAPI_CALL`
`257`	`267`	`layer->driver.vkQueueSubmit2KHR(queue, 1, &submitInfoPost, VK_NULL_HANDLE);`
`258`	`268`	`}`
`259`	`269`
	`270`	`+ if (layer->instance->config.serialize_queue_wait_idle())`
	`271`	`+ {`
	`272`	`+ layer->driver.vkDeviceWaitIdle(layer->device);`
	`273`	`+ }`
	`274`	`+`
`260`	`275`	`return result;`
`261`	`276`	`}`