@@ -379,21 +379,7 @@ ModelInstanceState::SaveRequestsToSharedMemory(
379379 std::unique_ptr<InferRequest> infer_request;
380380 if (model_state->IsDecoupled ()) {
381381 TRITONBACKEND_ResponseFactory* factory_ptr;
382- // Reuse the response factory if there is already a response factory
383- // associated with the request
384- std::lock_guard<std::mutex> guard{response_factory_map_mutex_};
385- {
386- if (response_factory_map_.find (reinterpret_cast <intptr_t >(request)) !=
387- response_factory_map_.end ()) {
388- factory_ptr =
389- response_factory_map_[reinterpret_cast <intptr_t >(request)];
390- } else {
391- RETURN_IF_ERROR (
392- TRITONBACKEND_ResponseFactoryNew (&factory_ptr, request));
393- response_factory_map_[reinterpret_cast <intptr_t >(request)] =
394- factory_ptr;
395- }
396- }
382+ RETURN_IF_ERROR (TRITONBACKEND_ResponseFactoryNew (&factory_ptr, request));
397383
398384 infer_request = std::make_unique<InferRequest>(
399385 id, correlation_id, pb_input_tensors, requested_output_names,
@@ -843,7 +829,8 @@ ModelInstanceState::StubToParentMQMonitor()
843829 ProcessLogRequest (message);
844830 break ;
845831 }
846- case PYTHONSTUB_CleanupRequest: {
832+ case PYTHONSTUB_BLSDecoupledInferPayloadCleanup:
833+ case PYTHONSTUB_BLSDecoupledResponseFactoryCleanup: {
847834 ProcessBLSCleanupRequest (message);
848835 break ;
849836 }
@@ -941,9 +928,17 @@ ModelInstanceState::ProcessBLSCleanupRequest(
941928 Stub ()->ShmPool ()->Load <char >(message->Args ());
942929 CleanupMessage* cleanup_message_ptr =
943930 reinterpret_cast <CleanupMessage*>(cleanup_request_message.data_ .get ());
944-
945- void * id = cleanup_message_ptr->id ;
946- infer_payload_.erase (reinterpret_cast <intptr_t >(id));
931+ intptr_t id = reinterpret_cast <intptr_t >(cleanup_message_ptr->id );
932+ if (message->Command () == PYTHONSTUB_BLSDecoupledInferPayloadCleanup) {
933+ // Remove the InferPayload object from the map.
934+ infer_payload_.erase (id);
935+ } else if (
936+ message->Command () == PYTHONSTUB_BLSDecoupledResponseFactoryCleanup) {
937+ // Delete response factory
938+ std::unique_ptr<
939+ TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
940+ response_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(id));
941+ }
947942
948943 {
949944 bi::scoped_lock<bi::interprocess_mutex> lock{*(message->ResponseMutex ())};
@@ -1172,12 +1167,6 @@ ModelInstanceState::ResponseSendDecoupled(
11721167 std::lock_guard<std::mutex> guard{closed_requests_mutex_};
11731168 closed_requests_.push_back (send_message_payload->request_address );
11741169 }
1175-
1176- // Clean up the response factory map.
1177- {
1178- std::lock_guard<std::mutex> guard{response_factory_map_mutex_};
1179- response_factory_map_.erase (send_message_payload->request_address );
1180- }
11811170 }
11821171
11831172 if (send_message_payload->response != 0 ) {
@@ -1195,14 +1184,7 @@ ModelInstanceState::ResponseSendDecoupled(
11951184 error_message);
11961185
11971186 std::vector<std::pair<std::unique_ptr<PbMemory>, void *>> gpu_output_buffers;
1198- std::unique_ptr<
1199- TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1200- response_factory_ptr;
12011187 GPUBuffersHelper gpu_buffer_helper;
1202- if (send_message_payload->flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
1203- response_factory_ptr.reset (
1204- reinterpret_cast <TRITONBACKEND_ResponseFactory*>(response_factory));
1205- }
12061188
12071189#ifdef TRITON_ENABLE_GPU
12081190 for (auto & output_tensor : infer_response->OutputTensors ()) {
@@ -1289,13 +1271,6 @@ ModelInstanceState::ResponseSendDecoupled(
12891271 response_factory, send_message_payload->flags );
12901272 SetErrorForResponseSendMessage (
12911273 send_message_payload, WrapTritonErrorInSharedPtr (error), error_message);
1292-
1293- if (send_message_payload->flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) {
1294- std::unique_ptr<
1295- TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter>
1296- response_factory (reinterpret_cast <TRITONBACKEND_ResponseFactory*>(
1297- send_message_payload->response_factory_address ));
1298- }
12991274 }
13001275}
13011276
@@ -1368,11 +1343,6 @@ ModelInstanceState::ProcessRequestsDecoupled(
13681343 TRITONSERVER_ERROR_INTERNAL, error->String ().c_str ());
13691344 }
13701345
1371- // Reset the release flags for all the requests.
1372- for (auto & infer_request : pb_infer_requests) {
1373- infer_request->SetReleaseFlags (TRITONSERVER_REQUEST_RELEASE_ALL);
1374- }
1375-
13761346 return TRITONSERVER_ErrorNew (
13771347 TRITONSERVER_ERROR_INTERNAL, " Failed to process the requests." );
13781348 }
@@ -2499,15 +2469,9 @@ TRITONBACKEND_ModelInstanceExecute(
24992469 }
25002470 }
25012471
2502- // We should only delete the response factory for the requests that have
2503- // not been closed.
25042472 for (auto & infer_request : infer_requests) {
2505- if (!instance_state->ExistsInClosedRequests (
2506- infer_request->RequestAddress ())) {
2507- LOG_IF_ERROR (
2508- infer_request->DeleteResponseFactory (),
2509- " Failed to delete the response factory." );
2510- }
2473+ // Reset the release flags for all the requests.
2474+ infer_request->SetReleaseFlags (TRITONSERVER_REQUEST_RELEASE_ALL);
25112475 }
25122476 }
25132477 }
0 commit comments