Skip to content

Commit af71537

Browse files
committed
update model inference.
1 parent 0f8ee68 commit af71537

File tree

2 files changed

+31
-18
lines changed

2 files changed

+31
-18
lines changed

python/fedml/cli/model_deployment/device_model_deployment.py

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import platform
55
import time
66

7-
import psutil
7+
import numpy as np
88
import requests
99
import tritonclient.http as http_client
1010

@@ -116,7 +116,7 @@ def should_exit_logs(cmd_type, cmd_process_id, model_name, inference_engine, inf
116116
get_model_info(model_name, inference_engine, inference_port)
117117
logging.info("Log test for deploying model successfully, inference url: {}, "
118118
"model metadata: {}, model config: {}".format(
119-
inference_output_url, model_metadata, model_config))
119+
inference_output_url, model_metadata, model_config))
120120
if inference_output_url != "":
121121
return True
122122
except Exception as e:
@@ -152,7 +152,7 @@ def log_deployment_result(cmd_container_name, cmd_type, cmd_process_id, inferenc
152152
logging.info("{}".format(added_logs))
153153
last_err_logs = err_str
154154

155-
time.sleep(5)
155+
time.sleep(3)
156156

157157
if should_exit_logs(cmd_type, cmd_process_id, inference_model_name, inference_engine, inference_http_port):
158158
break
@@ -185,11 +185,11 @@ def get_model_info(model_name, inference_engine, inference_http_port, infer_host
185185
infer_url_host = infer_host
186186
else:
187187
infer_url_host = local_ip
188-
inference_output_url = "{}:{}/{}/models/{}/versions/{}/infer".format(infer_url_host,
189-
inference_http_port,
190-
ClientConstants.INFERENCE_INFERENCE_SERVER_VERSION,
191-
inference_model_name,
192-
model_version)
188+
inference_output_url = "http://{}:{}/{}/models/{}/versions/{}/infer".format(infer_url_host,
189+
inference_http_port,
190+
ClientConstants.INFERENCE_INFERENCE_SERVER_VERSION,
191+
inference_model_name,
192+
model_version)
193193

194194
return inference_output_url, model_version, model_metadata, model_config
195195

@@ -213,14 +213,18 @@ def run_http_inference_with_lib_http_api(model_name, inference_http_port, batch_
213213
model_metadata = triton_client.get_model_metadata(model_name=inference_model_name, model_version=model_version)
214214
model_config = triton_client.get_model_config(model_name=inference_model_name, model_version=model_version)
215215

216-
inference_output_sample = {}
216+
print("model metadata {}".format(model_metadata))
217+
inference_response_list = list()
217218
inference_input_list = model_metadata["inputs"]
218219
infer_item_count = 0
219220
inference_query_list = []
221+
222+
input_data_np = np.asarray(inference_input_data_list * batch_size, dtype=object)
223+
220224
for infer_input_item in inference_input_list:
221225
query_item = http_client.InferInput(name=infer_input_item["name"],
222-
shape=(batch_size,), datatype=infer_input_item["data_type"])
223-
query_item.set_data_from_numpy(inference_input_data_list[infer_item_count])
226+
shape=(batch_size,), datatype=infer_input_item["datatype"])
227+
query_item.set_data_from_numpy(input_data_np)
224228
inference_query_list.append(query_item)
225229
infer_item_count += 1
226230

@@ -238,9 +242,13 @@ def run_http_inference_with_lib_http_api(model_name, inference_http_port, batch_
238242
)
239243

240244
for infer_output_item in inference_output_list:
241-
inference_output_sample[infer_output_item["name"]] = response.as_numpy(infer_output_item["name"])
245+
response_item = response.get_output(infer_output_item["name"])
246+
inference_response_list.append(response_item)
247+
print("response item {}".format(response_item))
242248

243-
return inference_output_sample
249+
inference_response_dict = {"outputs": inference_response_list}
250+
print("return {}".format(inference_response_dict))
251+
return inference_response_dict
244252

245253

246254
def run_http_inference_with_raw_http_request(self, inference_input_json, inference_input_data_list):

python/fedml/cli/model_deployment/device_model_inference.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from urllib.parse import urlparse
33

44
from fastapi import FastAPI, Request
5-
from fedml.cli.model_deployment.device_model_deployment import run_http_inference_with_lib_http_api
5+
from fedml.cli.model_deployment.device_model_deployment import run_http_inference_with_lib_http_api, \
6+
run_http_inference_with_raw_http_request
67
from fedml.cli.model_deployment.device_client_constants import ClientConstants
78
from fedml.cli.model_deployment.device_server_constants import ServerConstants
8-
from fedml.cli.model_deployment.device_server_runner import FedMLServerRunner
99
from fedml.cli.model_deployment.device_model_monitor import FedMLModelMetrics
1010
from fedml.cli.model_deployment.device_model_cache import FedMLModelCache
1111

@@ -49,8 +49,11 @@ async def predict(request: Request):
4949
# Send inference request to idle device
5050
inference_response = {}
5151
if inference_output_url != "":
52+
input_data = input_json.get("data", "SampleData")
53+
input_data_list = list()
54+
input_data_list.append(str(input_data))
5255
inference_response = send_inference_request(idle_device, model_name, inference_host,
53-
inference_output_url, input_json, input_json)
56+
inference_output_url, input_json, input_data_list)
5457

5558
model_metrics.calc_metrics(model_id, model_name, infer_end_point_id, inference_output_url)
5659

@@ -79,9 +82,11 @@ def found_idle_inference_device(end_point_id):
7982
return idle_device, model_id, model_name, inference_host, inference_output_url
8083

8184

82-
def send_inference_request(device, model_name, inference_host, inference_url, json_req, bin_data=None):
85+
def send_inference_request(device, model_name, inference_host, inference_url, json_req, input_data_list=None):
8386
inference_response = run_http_inference_with_lib_http_api(model_name,
84-
ClientConstants.INFERENCE_HTTP_PORT, 1, bin_data,
87+
ClientConstants.INFERENCE_HTTP_PORT,
88+
1,
89+
input_data_list,
8590
inference_host)
8691
return inference_response
8792

0 commit comments

Comments
 (0)