Skip to content

Commit 83ac8c9

Browse files
authored
Merge pull request #687 from alex-liang-kh/dev/v0.7.0
Dev/v0.7.0
2 parents 0d46b0e + c034955 commit 83ac8c9

File tree

7 files changed

+173
-99
lines changed

7 files changed

+173
-99
lines changed

python/fedml/cli/model_deployment/device_client_constants.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ def get_model_package_dir():
119119
model_packages_dir = os.path.join(ClientConstants.get_fedml_home_dir(), "fedml", "model_packages")
120120
return model_packages_dir
121121

122+
@staticmethod
123+
def get_model_serving_dir():
124+
model_file_dir = os.path.join(ClientConstants.get_fedml_home_dir(), "fedml", "models_serving")
125+
return model_file_dir
126+
122127
@staticmethod
123128
def get_model_ops_list_url(model_name, page_num, page_size, config_version="release"):
124129
model_ops_url = "{}/api/v1/model/list?modelName={}&pageNum={}&pageSize={}".format(

python/fedml/cli/model_deployment/device_client_runner.py

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -162,16 +162,16 @@ def run(self):
162162
self.wait_client_mqtt_connected()
163163

164164
self.mlops_metrics.report_client_training_status(self.edge_id,
165-
ClientConstants.MSG_MLOPS_CLIENT_STATUS_INITIALIZING)
165+
ClientConstants.MSG_MLOPS_CLIENT_STATUS_RUNNING)
166166
self.send_deployment_status(self.edge_id, model_name, model_id, "",
167167
ClientConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_INITIALIZING)
168168

169169
# update local config with real time parameters from server and dynamically replace variables value
170170
logging.info("Download and unzip model to local...")
171171
unzip_package_path, fedml_config_object = self.update_local_fedml_config(run_id, model_config)
172-
ClientConstants.cleanup_learning_process()
173172

174173
inference_output_url, model_version, model_metadata, model_config = start_deployment(
174+
inference_end_point_id, model_id,
175175
unzip_package_path, model_name, inference_engine,
176176
ClientConstants.INFERENCE_HTTP_PORT,
177177
ClientConstants.INFERENCE_GRPC_PORT,
@@ -181,24 +181,28 @@ def run(self):
181181
ClientConstants.INFERENCE_SERVER_IMAGE,
182182
self.infer_host)
183183
if inference_output_url == "":
184-
self.setup_client_mqtt_mgr()
185-
self.wait_client_mqtt_connected()
186-
self.mlops_metrics.report_client_id_status(run_id, self.edge_id,
187-
ClientConstants.MSG_MLOPS_CLIENT_STATUS_FAILED)
188184
self.send_deployment_status(self.edge_id, model_id, model_name, inference_output_url,
189185
ClientConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_FAILED)
190186
self.send_deployment_results(self.edge_id, model_id, model_name, inference_output_url, model_version,
191187
ClientConstants.INFERENCE_HTTP_PORT, inference_engine,
192188
model_metadata, model_config)
189+
self.setup_client_mqtt_mgr()
190+
self.wait_client_mqtt_connected()
191+
self.mlops_metrics.run_id = self.run_id
192+
self.mlops_metrics.broadcast_client_training_status(self.edge_id,
193+
ClientConstants.MSG_MLOPS_CLIENT_STATUS_FAILED)
193194
self.release_client_mqtt_mgr()
194195
else:
195196
self.send_deployment_status(self.edge_id, model_id, model_name, inference_output_url,
196197
ClientConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_DEPLOYED)
197198
self.send_deployment_results(self.edge_id, model_id, model_name, inference_output_url, model_version,
198199
ClientConstants.INFERENCE_HTTP_PORT, inference_engine,
199200
model_metadata, model_config)
200-
self.mlops_metrics.report_client_id_status(run_id, self.edge_id,
201-
ClientConstants.MSG_MLOPS_CLIENT_STATUS_RUNNING)
201+
time.sleep(1)
202+
self.broadcast_client_training_status(self.edge_id, ClientConstants.MSG_MLOPS_CLIENT_STATUS_FINISHED)
203+
204+
while True:
205+
time.sleep(1)
202206

203207
def send_deployment_results(self, device_id, model_id, model_name, model_inference_url,
204208
model_version, inference_port, inference_engine,
@@ -225,6 +229,19 @@ def send_deployment_status(self, device_id, model_id, model_name, model_inferenc
225229
self.client_mqtt_mgr.send_message_json(deployment_status_topic, json.dumps(deployment_status_payload))
226230
self.release_client_mqtt_mgr()
227231

232+
def broadcast_client_training_status(self, edge_id, status):
233+
run_id = 0
234+
if self.run_id is not None:
235+
run_id = self.run_id
236+
topic_name = "fl_client/mlops/status"
237+
msg = {"edge_id": edge_id, "run_id": run_id, "status": status}
238+
message_json = json.dumps(msg)
239+
logging.info("report_client_training_status. message_json = %s" % message_json)
240+
self.setup_client_mqtt_mgr()
241+
self.wait_client_mqtt_connected()
242+
self.client_mqtt_mgr.send_message_json(topic_name, message_json)
243+
self.release_client_mqtt_mgr()
244+
228245
def reset_devices_status(self, edge_id, status):
229246
self.mlops_metrics.run_id = self.run_id
230247
self.mlops_metrics.edge_id = edge_id
@@ -244,11 +261,6 @@ def stop_run(self):
244261

245262
time.sleep(1)
246263

247-
try:
248-
ClientConstants.cleanup_learning_process()
249-
except Exception as e:
250-
pass
251-
252264
self.release_client_mqtt_mgr()
253265

254266
def stop_run_with_killed_status(self):
@@ -265,11 +277,6 @@ def stop_run_with_killed_status(self):
265277

266278
time.sleep(1)
267279

268-
try:
269-
ClientConstants.cleanup_learning_process()
270-
except Exception as e:
271-
pass
272-
273280
self.release_client_mqtt_mgr()
274281

275282
def exit_run_with_exception(self):
@@ -279,7 +286,6 @@ def exit_run_with_exception(self):
279286

280287
logging.info("Exit run successfully.")
281288

282-
ClientConstants.cleanup_learning_process()
283289
ClientConstants.cleanup_run_process()
284290

285291
# Notify MLOps with the stopping message
@@ -308,11 +314,6 @@ def cleanup_run_when_starting_failed(self):
308314

309315
time.sleep(1)
310316

311-
try:
312-
ClientConstants.cleanup_learning_process()
313-
except Exception as e:
314-
pass
315-
316317
self.release_client_mqtt_mgr()
317318

318319
def cleanup_run_when_finished(self):
@@ -333,11 +334,6 @@ def cleanup_run_when_finished(self):
333334

334335
time.sleep(1)
335336

336-
try:
337-
ClientConstants.cleanup_learning_process()
338-
except Exception as e:
339-
pass
340-
341337
self.release_client_mqtt_mgr()
342338

343339
def callback_server_status_msg(self, topic=None, payload=None):
@@ -475,7 +471,7 @@ def callback_start_deployment(self, topic, payload):
475471
)
476472
client_runner.infer_host = self.infer_host
477473
self.process = Process(target=client_runner.run)
478-
#client_runner.run()
474+
# client_runner.run()
479475
self.process.start()
480476
ClientConstants.save_run_process(self.process.pid)
481477

python/fedml/cli/model_deployment/device_model_cache.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,20 +75,22 @@ def get_result_item_info(self, result_item):
7575
result_payload = result_item_json["result"]
7676
return device_id, result_payload
7777

78-
def get_idle_device(self, end_point_id):
78+
def get_idle_device(self, end_point_id, in_model_id):
7979
idle_device_id = ""
8080
status_list = self.get_deployment_status_list(end_point_id)
8181
for status_item in status_list:
8282
device_id, status_payload = self.get_status_item_info(status_item)
8383
model_status = status_payload["model_status"]
84-
if model_status == ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_DEPLOYED:
84+
model_id = status_payload["model_id"]
85+
if model_id == in_model_id and model_status == ServerConstants.MSG_MODELOPS_DEPLOYMENT_STATUS_DEPLOYED:
8586
idle_device_id = device_id
8687
break
8788

8889
result_list = self.get_deployment_result_list(end_point_id)
8990
for result_item in result_list:
9091
device_id, result_payload = self.get_result_item_info(result_item)
91-
if device_id == idle_device_id:
92+
model_id = result_payload["model_id"]
93+
if device_id == idle_device_id and model_id == in_model_id:
9294
return result_payload
9395

9496
return {}

0 commit comments

Comments
 (0)