Skip to content

Commit

Permalink
Cleaning extra logic for handling no limiting case
Browse files Browse the repository at this point in the history
  • Loading branch information
tanmayv25 committed Sep 21, 2021
1 parent 053e833 commit de20658
Showing 1 changed file with 4 additions and 21 deletions.
25 changes: 4 additions & 21 deletions src/core/rate_limiter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,6 @@ RateLimiter::EnqueuePayload(
}
}
if (ignore_resources_and_priority_) {
// Directly wake up any one of the waiting thread to process the payload.
if (pinstance == nullptr) {
payload_queue->cv_.notify_one();
} else {
Expand Down Expand Up @@ -337,13 +336,7 @@ RateLimiter::DeferPayloadSchedule(
}

itr->second.EnqueueModelInstanceRequest(OnSchedule, triton_model_instance);
if (ignore_resources_and_priority_) {
// Directly allocate an available model instance if not using rate
// limiter.
itr->second.AllocateInstanceIfAvailable();
} else {
itr->second.StageInstanceIfAvailable();
}
itr->second.StageInstanceIfAvailable();

return Status::Success;
}
Expand All @@ -358,7 +351,6 @@ RateLimiter::SchedulePayload(
} else {
payload_queue->specific_queues_[tmi]->Enqueue(payload);
}
// Directly schedule the payload to run
payload->SetState(Payload::State::SCHEDULED);
}

Expand All @@ -377,17 +369,9 @@ RateLimiter::OnRelease(ModelInstanceContext* instance)
{
auto& model_context = model_contexts_[instance->RawInstance()->Model()];
model_context.AddAvailableInstance(instance);
if (!ignore_resources_and_priority_) {
resource_manager_->ReleaseResources(instance);
}
resource_manager_->ReleaseResources(instance);
if (model_context.ContainsPendingRequests(instance->RawInstance()->Index())) {
if (ignore_resources_and_priority_) {
// Directly allocate an available model instance if not using rate
// limiter.
model_context.AllocateInstanceIfAvailable();
} else {
model_context.StageInstanceIfAvailable();
}
model_context.StageInstanceIfAvailable();
}
AttemptAllocation();
}
Expand All @@ -398,8 +382,7 @@ RateLimiter::AttemptAllocation()
std::lock_guard<std::recursive_mutex> lk(staged_instances_mtx_);
if (!staged_instances_.empty()) {
ModelInstanceContext* instance = staged_instances_.top();
if (ignore_resources_and_priority_ ||
resource_manager_->AllocateResources(instance)) {
if (resource_manager_->AllocateResources(instance)) {
staged_instances_.pop();
instance->Allocate();
}
Expand Down

0 comments on commit de20658

Please sign in to comment.