Skip to content

Commit

Permalink
Set CPU thread priority for scheduler threads based on model priority
Browse files Browse the repository at this point in the history
  • Loading branch information
David Goodwin committed Nov 27, 2018
1 parent f5151d4 commit 3af3709
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/core/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,5 +55,6 @@ constexpr char kMetricsLabelGpuUuid[] = "gpu_uuid";

constexpr uint64_t NANOS_PER_SECOND = 1000000000;
constexpr int MAX_GRPC_MESSAGE_SIZE = INT32_MAX;
constexpr int SCHEDULER_DEFAULT_NICE = 5;

}} // namespace nvidia::inferenceserver
33 changes: 30 additions & 3 deletions src/core/infer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@

#include "src/core/infer.h"

#include <sys/resource.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include <chrono>
#include "src/core/constants.h"
#include "src/core/logging.h"
Expand Down Expand Up @@ -666,10 +671,26 @@ InferenceServable::SetRunnerCount(uint32_t cnt)

runner_cnt_ = cnt;

// Set default nice level unless overridden by model priority
int nice = SCHEDULER_DEFAULT_NICE;
if (config_.has_optimization()) {
switch (config_.optimization().priority()) {
case ModelOptimizationPolicy::PRIORITY_MAX:
nice = 0;
break;
case ModelOptimizationPolicy::PRIORITY_MIN:
nice = 19;
break;
default:
nice = SCHEDULER_DEFAULT_NICE;
break;
}
}

// Create the runner threads for this servable.
for (uint32_t c = 0; c < runner_cnt_; ++c) {
runner_threads_.emplace_back(
new std::thread([this, c]() { RunnerThread(c); }));
new std::thread([this, c, nice]() { RunnerThread(c, nice); }));
}

return tensorflow::Status::OK();
Expand Down Expand Up @@ -761,9 +782,15 @@ InferenceServable::Run(
}

void
InferenceServable::RunnerThread(const uint32_t runner_id)
InferenceServable::RunnerThread(const uint32_t runner_id, const int nice)
{
LOG_INFO << "Starting runner thread " << runner_id << "...";
if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), nice) == 0) {
LOG_INFO << "Starting runner thread " << runner_id << " at nice " << nice
<< "...";
} else {
LOG_ERROR << "Starting runner thread " << runner_id
<< " at default nice (requested nice " << nice << " failed)...";
}

// For debugging delay start of runner threads until the queue
// contains the specified number of entries.
Expand Down
8 changes: 4 additions & 4 deletions src/core/infer.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,9 @@ class InferenceServable {
// dynamically adjust the number of runners.
tensorflow::Status SetRunnerCount(uint32_t cnt);

// Called by runer thread when a request has been completed with the
// result status for the request. If successful the ResponseProvider
// will have been updated with the response.
// Called by runner thread when a request has been completed with
// the result status for the request. If successful the
// ResponseProvider will have been updated with the response.
using CompleteFunc = std::function<void(tensorflow::Status)>;

struct RunnerPayload {
Expand Down Expand Up @@ -377,7 +377,7 @@ class InferenceServable {
std::vector<std::unique_ptr<std::thread>> runner_threads_;
std::atomic<bool> runner_threads_exit_;

void RunnerThread(const uint32_t runner_id);
void RunnerThread(const uint32_t runner_id, const int nice);
uint64_t GetDynamicBatch(const ModelDynamicBatching& batching_config);

size_t max_preferred_batch_size_;
Expand Down

0 comments on commit 3af3709

Please sign in to comment.