Set CPU thread priority for scheduler threads based on model priority

pangge · Nov 27, 2018 · 3af3709 · 3af3709
1 parent f5151d4
commit 3af3709
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 7 deletions.
diff --git a/src/core/constants.h b/src/core/constants.h
@@ -55,5 +55,6 @@ constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";
 
 constexpr uint64_t NANOS_PER_SECOND = 1000000000;
 constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX;
+constexpr int SCHEDULER_DEFAULT_NICE = 5;
 
 }}  // namespace nvidia::inferenceserver
diff --git a/src/core/infer.cc b/src/core/infer.cc
@@ -26,6 +26,11 @@
 
 #include "src/core/infer.h"
 
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
 #include <chrono>
 #include "src/core/constants.h"
 #include "src/core/logging.h"
@@ -666,10 +671,26 @@ InferenceServable::SetRunnerCount(uint32_t cnt)
 
   runner_cnt_ = cnt;
 
+  // Set default nice level unless overridden by model priority
+  int nice = SCHEDULER_DEFAULT_NICE;
+  if (config_.has_optimization()) {
+    switch (config_.optimization().priority()) {
+      case ModelOptimizationPolicy::PRIORITY_MAX:
+        nice = 0;
+        break;
+      case ModelOptimizationPolicy::PRIORITY_MIN:
+        nice = 19;
+        break;
+      default:
+        nice = SCHEDULER_DEFAULT_NICE;
+        break;
+    }
+  }
+
   // Create the runner threads for this servable.
   for (uint32_t c = 0; c < runner_cnt_; ++c) {
     runner_threads_.emplace_back(
-      new std::thread([this, c]() { RunnerThread(c); }));
+      new std::thread([this, c, nice]() { RunnerThread(c, nice); }));
   }
 
   return tensorflow::Status::OK();
@@ -761,9 +782,15 @@ InferenceServable::Run(
 }
 
 void
-InferenceServable::RunnerThread(const uint32_t runner_id)
+InferenceServable::RunnerThread(const uint32_t runner_id, const int nice)
 {
-  LOG_INFO << "Starting runner thread " << runner_id << "...";
+  if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), nice) == 0) {
+    LOG_INFO << "Starting runner thread " << runner_id << " at nice " << nice
+             << "...";
+  } else {
+    LOG_ERROR << "Starting runner thread " << runner_id
+              << " at default nice (requested nice " << nice << " failed)...";
+  }
 
   // For debugging delay start of runner threads until the queue
   // contains the specified number of entries.

diff --git a/src/core/infer.h b/src/core/infer.h
@@ -300,9 +300,9 @@ class InferenceServable {
   // dynamically adjust the number of runners.
   tensorflow::Status SetRunnerCount(uint32_t cnt);
 
-  // Called by runer thread when a request has been completed with the
-  // result status for the request. If successful the ResponseProvider
-  // will have been updated with the response.
+  // Called by runner thread when a request has been completed with
+  // the result status for the request. If successful the
+  // ResponseProvider will have been updated with the response.
   using CompleteFunc = std::function<void(tensorflow::Status)>;
 
   struct RunnerPayload {
@@ -377,7 +377,7 @@ class InferenceServable {
   std::vector<std::unique_ptr<std::thread>> runner_threads_;
   std::atomic<bool> runner_threads_exit_;
 
-  void RunnerThread(const uint32_t runner_id);
+  void RunnerThread(const uint32_t runner_id, const int nice);
   uint64_t GetDynamicBatch(const ModelDynamicBatching& batching_config);
 
   size_t max_preferred_batch_size_;