@@ -169,13 +169,6 @@ void BasicBackend::Infer(Ort::CustomOpApi& ort, OrtKernelContext* context) {
169169 // Preliminary Thread safety mechanism
170170 // currently allows a maximum of 8 Infer request's to paralelly execute at the same time
171171
172- // Requesting for an idle infer_request from a pool of infer_requests_
173- std::shared_ptr<InferenceEngine::InferRequest> infer_request = inferRequestsQueue_->getIdleRequest ();
174- if (!infer_request) {
175- LOGS_DEFAULT (INFO) << " No idle Infer Requests found from the infer_requests_ pool!" ;
176- THROW_IE_EXCEPTION << " No idle Infer Requests!" ;
177- }
178-
179172 LOGS_DEFAULT (INFO) << log_tag << " Running graph " << subgraph_context_.subgraph_name ;
180173 LOGS_DEFAULT (INFO) << log_tag << " In Infer" ;
181174
@@ -188,21 +181,30 @@ void BasicBackend::Infer(Ort::CustomOpApi& ort, OrtKernelContext* context) {
188181 FillOutputsWithConstantData (ort, node, output_tensor);
189182 }
190183#endif
184+ // Get Output tensors
185+ LOGS_DEFAULT (INFO) << log_tag << " Inference successful" ;
191186 } else {
192- StartAsyncInference (ort, context, infer_request);
193- CompleteAsyncInference (ort, context, infer_request);
194- }
195- // Get Output tensors
196- LOGS_DEFAULT (INFO) << log_tag << " Inference successful" ;
197- // Once the inference is completed, the infer_request becomes free and is placed back into pool of infer_requests_
198- inferRequestsQueue_->putIdleRequest (infer_request);
187+ // Requesting for an idle infer_request from a pool of infer_requests_
188+ std::shared_ptr<InferenceEngine::InferRequest> infer_request = inferRequestsQueue_->getIdleRequest ();
189+ if (!infer_request) {
190+ LOGS_DEFAULT (INFO) << " No idle Infer Requests found from the infer_requests_ pool!" ;
191+ THROW_IE_EXCEPTION << " No idle Infer Requests!" ;
192+ }
193+ StartAsyncInference (ort, context, infer_request);
194+ CompleteAsyncInference (ort, context, infer_request);
195+
196+ // Get Output tensors
197+ LOGS_DEFAULT (INFO) << log_tag << " Inference successful" ;
198+ // Once the inference is completed, the infer_request becomes free and is placed back into pool of infer_requests_
199+ inferRequestsQueue_->putIdleRequest (infer_request);
199200#ifndef NDEBUG
200201 if (openvino_ep::backend_utils::IsDebugEnabled ()) {
201202 inferRequestsQueue_->printstatus (); // Printing the elements of infer_requests_ vector pool only in debug mode
202203 std::string& hw_target = (global_context_.device_id != " " ) ? global_context_.device_id : global_context_.device_type ;
203204 printPerformanceCounts (infer_request, std::cout, hw_target);
204205 }
205206#endif
207+ }
206208}
207209
208210} // namespace openvino_ep
0 commit comments