Skip to content

Commit d0766b7

Browse files
author
alexliang
committed
update cli.
1 parent 4b2eab5 commit d0766b7

File tree

15 files changed

+147
-108
lines changed

15 files changed

+147
-108
lines changed

devops/scripts/build-docs.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env bash
22

3+
WORKSPACE=./
34
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
45
bash miniconda.sh -b -p $WORKSPACE/miniconda
56
hash -r

python/app/fedcv/object_detection/config/bootstrap.bat

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@
55
:: ### please customize your script in this region ####
66
pip install opencv-python-headless pandas matplotlib seaborn addict
77
set DATA_PATH=%userprofile%\fedcv_data
8-
if exist %DATA_PATH% (echo Exist %DATA_PATH%) else mkdir %DATA_PATH%
9-
.\..\data\coco128\download_coco128.bat
8+
if exist %DATA_PATH% (
9+
echo Exist %DATA_PATH%
10+
) ^
11+
else (
12+
mkdir %DATA_PATH%
13+
set cur_dir=%cd%
14+
%cur_dir%\..\data\coco128\download_coco128.bat
15+
)
1016

1117

1218
:: ### don't modify this part ###

python/app/fedcv/object_detection/config/bootstrap.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
#!/bin/bash
12
### don't modify this part ###
2-
set -x
3+
# set -x
34
##############################
45

56

python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/custom_data_and_model/config/fedml_config.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,15 @@ device_args:
4040
gpu_mapping_key: mapping_default
4141

4242
comm_args:
43+
#backend: "MQTT_IPFS"
4344
backend: "MQTT_S3"
4445
mqtt_config_path: config/mqtt_config.yaml
4546
s3_config_path: config/s3_config.yaml
4647
# If you want to use your customized MQTT or s3 server as training backends, you should uncomment and set the following lines.
4748
#customized_training_mqtt_config: {'BROKER_HOST': 'your mqtt server address or domain name', 'MQTT_PWD': 'your mqtt password', 'BROKER_PORT': 1883, 'MQTT_KEEPALIVE': 180, 'MQTT_USER': 'your mqtt user'}
4849
#customized_training_s3_config: {'CN_S3_SAK': 'your s3 aws_secret_access_key', 'CN_REGION_NAME': 'your s3 region name', 'CN_S3_AKI': 'your s3 aws_access_key_id', 'BUCKET_NAME': 'your s3 bucket name'}
49-
50+
#customized_training_ipfs_config: {'token': 'your ipfs token at web3.storage', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link', }
51+
#customized_training_ipfs_config: {'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJkaWQ6ZXRocjoweERiNzY5OTdGRUFBRjg3QjY0RDc3MWZBYjg2RUExMTg2ODRBOEI0MTUiLCJpc3MiOiJ3ZWIzLXN0b3JhZ2UiLCJpYXQiOjE2NjM0OTkwNzQ3MzQsIm5hbWUiOiJmZWRtbC13ZWIzIn0.VV8WHJaqe8cbShek8warIGFbS55KOUfpgtLMRevPIIY', 'upload_uri': 'https://api.web3.storage/upload', 'download_uri': 'ipfs.w3s.link', }
5052

5153
tracking_args:
5254
# the default log path is at ~/fedml-client/fedml/logs/ and ~/fedml-server/fedml/logs/

python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/custom_data_and_model/torch_client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22

33
import fedml
44
from fedml import FedMLRunner
5+
import fedml
56
from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist
7+
from fedml.core.alg_frame.context import Context
68

79

810
def load_data(args):
@@ -57,6 +59,8 @@ def forward(self, x):
5759

5860

5961
if __name__ == "__main__":
62+
Context().add("ipfs_secret_key", "12345678123456781234567812345678")
63+
6064
# init FedML framework
6165
args = fedml.init()
6266

python/examples/cross_silo/mqtt_s3_fedavg_mnist_lr_example/custom_data_and_model/torch_server.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import fedml
44
from fedml import FedMLRunner
55
from fedml.data.MNIST.data_loader import download_mnist, load_partition_data_mnist
6+
from fedml.core.alg_frame.context import Context
67

78

89
def load_data(args):
@@ -57,6 +58,8 @@ def forward(self, x):
5758

5859

5960
if __name__ == "__main__":
61+
Context().add("ipfs_secret_key", "12345678123456781234567812345678")
62+
6063
# init FedML framework
6164
args = fedml.init()
6265

python/fedml/cli/edge_deployment/client_constants.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -218,33 +218,33 @@ def exit_process(process):
218218
pass
219219

220220
@staticmethod
221-
def exec_console_with_script(script_path, should_capture_stdout_err=False):
222-
if should_capture_stdout_err:
223-
if platform.system() == 'Windows':
224-
script_process = subprocess.Popen(script_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
225-
else:
226-
script_process = subprocess.Popen(['sh', '-c', script_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
221+
def exec_console_with_script(script_path, should_capture_stdout=False, should_capture_stderr=False):
222+
stdout_flag = subprocess.PIPE if should_capture_stdout else sys.stdout
223+
stderr_flag = subprocess.PIPE if should_capture_stderr else sys.stderr
224+
225+
if platform.system() == 'Windows':
226+
script_process = subprocess.Popen(script_path, stdout=stdout_flag, stderr=stderr_flag)
227227
else:
228-
if platform.system() == 'Windows':
229-
script_process = subprocess.Popen(script_path, stdout=sys.stdout, stderr=subprocess.PIPE)
230-
else:
231-
script_process = subprocess.Popen(['sh', '-c', script_path], stdout=sys.stdout, stderr=subprocess.PIPE)
228+
script_process = subprocess.Popen(['sh', '-c', script_path], stdout=stdout_flag, stderr=stderr_flag)
229+
232230
return script_process
233231

234232
@staticmethod
235-
def exec_console_with_shell(shell, script_path, should_capture_stdout_err=False):
236-
if should_capture_stdout_err:
237-
script_process = subprocess.Popen([shell, script_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
238-
else:
239-
script_process = subprocess.Popen([shell, script_path], stdout=sys.stdout, stderr=subprocess.PIPE)
233+
def exec_console_with_shell(shell, script_path, should_capture_stdout=False, should_capture_stderr=False):
234+
stdout_flag = subprocess.PIPE if should_capture_stdout else sys.stdout
235+
stderr_flag = subprocess.PIPE if should_capture_stderr else sys.stderr
236+
237+
script_process = subprocess.Popen([shell, script_path], stdout=stdout_flag, stderr=stderr_flag)
238+
240239
return script_process
241240

242241
@staticmethod
243-
def exec_console_with_shell_script_list(shell_script_list, should_capture_stdout_err=False):
244-
if should_capture_stdout_err:
245-
script_process = subprocess.Popen(shell_script_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
246-
else:
247-
script_process = subprocess.Popen(shell_script_list, stdout=sys.stdout, stderr=sys.stderr)
242+
def exec_console_with_shell_script_list(shell_script_list, should_capture_stdout=False, should_capture_stderr=False):
243+
stdout_flag = subprocess.PIPE if should_capture_stdout else sys.stdout
244+
stderr_flag = subprocess.PIPE if should_capture_stderr else sys.stderr
245+
246+
script_process = subprocess.Popen(shell_script_list, stdout=stdout_flag, stderr=stderr_flag)
247+
248248
return script_process
249249

250250
@staticmethod

python/fedml/cli/edge_deployment/client_daemon.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@
4444
]
4545
)
4646
ret_code, exec_out, exec_err = ClientConstants.get_console_sys_out_pipe_err_results(login_pid)
47-
time.sleep(15)
47+
time.sleep(3)
4848

python/fedml/cli/edge_deployment/client_runner.py

Lines changed: 34 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -253,14 +253,18 @@ def build_dynamic_args(self, run_config, package_conf_object, base_dir):
253253
if os.path.exists(bootstrap_script_path):
254254
bootstrap_stat = os.stat(bootstrap_script_path)
255255
if platform.system() == 'Windows':
256-
os.chmod(bootstrap_script_path, bootstrap_stat.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
256+
os.chmod(bootstrap_script_path,
257+
bootstrap_stat.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
257258
bootstrap_scripts = "{}".format(bootstrap_script_path)
258259
else:
259-
os.chmod(bootstrap_script_path, bootstrap_stat.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
260-
bootstrap_scripts = "cd {}; ./{}".format(bootstrap_script_dir, os.path.basename(bootstrap_script_file))
260+
os.chmod(bootstrap_script_path,
261+
bootstrap_stat.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
262+
bootstrap_scripts = "cd {}; ./{}".format(bootstrap_script_dir,
263+
os.path.basename(bootstrap_script_file))
261264
bootstrap_scripts = str(bootstrap_scripts).replace('\\', os.sep).replace('/', os.sep)
262265
logging.info("Bootstrap scripts are being executed...")
263-
process = ClientConstants.exec_console_with_script(bootstrap_scripts, should_capture_stdout_err=True)
266+
process = ClientConstants.exec_console_with_script(bootstrap_scripts, should_capture_stdout=True,
267+
should_capture_stderr=True)
264268
ret_code, out, err = ClientConstants.get_console_pipe_out_err_results(process)
265269
if out is not None:
266270
out_str = out.decode(encoding="utf-8")
@@ -340,12 +344,15 @@ def run(self):
340344
str(dynamic_args_config["rank"]),
341345
"--role",
342346
"client",
343-
]
347+
],
348+
should_capture_stdout=False,
349+
should_capture_stderr=True
344350
)
345351
ClientConstants.save_learning_process(process.pid)
346352
self.release_client_mqtt_mgr()
347-
ret_code, out, err = ClientConstants.get_console_sys_out_pipe_err_results(process)
348-
if ret_code != 0 and err is not None and str(err.decode(encoding="utf-8")).find('__finish ') == -1:
353+
ret_code, out, err = ClientConstants.get_console_pipe_out_err_results(process)
354+
if ret_code != 0 and err is not None and str(err.decode(encoding="utf-8")).find('__finish ') == -1 and \
355+
(out is not None and str(out.decode(encoding="utf-8")).find('__finish ') == -1):
349356
logging.error("Exception when executing client program: {}".format(err.decode(encoding="utf-8")))
350357
self.setup_client_mqtt_mgr()
351358
self.wait_client_mqtt_connected()
@@ -365,11 +372,6 @@ def stop_run(self):
365372

366373
logging.info("Stop run successfully.")
367374

368-
# Stop log processor for current run
369-
MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(self.run_id, self.edge_id)
370-
371-
time.sleep(2)
372-
373375
# Notify MLOps with the stopping message
374376
self.mlops_metrics.report_client_training_status(self.edge_id, ClientConstants.MSG_MLOPS_CLIENT_STATUS_STOPPING)
375377

@@ -391,11 +393,6 @@ def stop_run_with_killed_status(self):
391393

392394
logging.info("Stop run successfully.")
393395

394-
# Stop log processor for current run
395-
MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(self.run_id, self.edge_id)
396-
397-
time.sleep(2)
398-
399396
# Notify MLOps with the stopping message
400397
self.mlops_metrics.report_client_training_status(self.edge_id, ClientConstants.MSG_MLOPS_CLIENT_STATUS_STOPPING)
401398

@@ -435,11 +432,6 @@ def cleanup_run_when_starting_failed(self):
435432

436433
logging.info("Cleanup run successfully when starting failed.")
437434

438-
# Stop log processor for current run
439-
MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(self.run_id, self.edge_id)
440-
441-
time.sleep(2)
442-
443435
self.reset_devices_status(self.edge_id, ClientConstants.MSG_MLOPS_CLIENT_STATUS_FAILED)
444436

445437
time.sleep(2)
@@ -465,11 +457,6 @@ def cleanup_run_when_finished(self):
465457

466458
logging.info("Cleanup run successfully when finished.")
467459

468-
# Stop log processor for current run
469-
MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(self.run_id, self.edge_id)
470-
471-
time.sleep(2)
472-
473460
self.reset_devices_status(self.edge_id, ClientConstants.MSG_MLOPS_CLIENT_STATUS_FINISHED)
474461

475462
time.sleep(2)
@@ -616,6 +603,9 @@ def callback_stop_train(self, topic, payload):
616603
except Exception as e:
617604
pass
618605

606+
# Stop log processor for current run
607+
MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(run_id, self.edge_id)
608+
619609
def callback_exit_train_with_exception(self, topic, payload):
620610
logging.info("callback_exit_train_with_exception: topic = %s, payload = %s" % (topic, payload))
621611

@@ -666,7 +656,12 @@ def callback_runner_id_status(self, topic, payload):
666656
run_id=run_id,
667657
)
668658
client_runner.device_status = status
669-
Process(target=client_runner.cleanup_client_with_status).start()
659+
status_process = Process(target=client_runner.cleanup_client_with_status)
660+
status_process.start()
661+
status_process.join(15)
662+
663+
# Stop log processor for current run
664+
MLOpsRuntimeLogDaemon.get_instance(self.args).stop_log_processor(run_id, edge_id)
670665

671666
def report_client_status(self):
672667
self.send_agent_active_msg()
@@ -768,7 +763,7 @@ def GetUUID():
768763
def bind_account_and_device_id(self, url, account_id, device_id, os_name, role="client"):
769764
ip = requests.get('https://checkip.amazonaws.com').text.strip()
770765
fedml_ver, exec_path, os_ver, cpu_info, python_ver, torch_ver, mpi_installed, \
771-
cpu_usage, available_mem, total_mem, gpu_info, gpu_available_mem, gpu_total_mem = get_sys_runner_info()
766+
cpu_usage, available_mem, total_mem, gpu_info, gpu_available_mem, gpu_total_mem = get_sys_runner_info()
772767
json_params = {
773768
"accountid": account_id,
774769
"deviceid": device_id,
@@ -794,7 +789,7 @@ def bind_account_and_device_id(self, url, account_id, device_id, os_name, role="
794789
if gpu_available_mem is not None:
795790
json_params["extra_infos"]["gpu_available_mem"] = gpu_available_mem
796791
if gpu_total_mem is not None:
797-
json_params["extra_infos"]["gpu_total_mem"] = gpu_total_mem
792+
json_params["extra_infos"]["gpu_total_mem"] = gpu_total_mem
798793
else:
799794
json_params["gpu"] = "None"
800795

@@ -803,12 +798,14 @@ def bind_account_and_device_id(self, url, account_id, device_id, os_name, role="
803798
try:
804799
requests.session().verify = cert_path
805800
response = requests.post(
806-
url, json=json_params, verify=True, headers={"content-type": "application/json", "Connection": "close"}
801+
url, json=json_params, verify=True,
802+
headers={"content-type": "application/json", "Connection": "close"}
807803
)
808804
except requests.exceptions.SSLError as err:
809805
MLOpsConfigs.install_root_ca_file()
810806
response = requests.post(
811-
url, json=json_params, verify=True, headers={"content-type": "application/json", "Connection": "close"}
807+
url, json=json_params, verify=True,
808+
headers={"content-type": "application/json", "Connection": "close"}
812809
)
813810
else:
814811
response = requests.post(url, json=json_params, headers={"Connection": "close"})
@@ -826,9 +823,9 @@ def send_agent_active_msg(self):
826823
active_topic = "/flclient_agent/active"
827824
status = MLOpsStatus.get_instance().get_client_agent_status(self.edge_id)
828825
if (
829-
status is not None
830-
and status != ClientConstants.MSG_MLOPS_CLIENT_STATUS_OFFLINE
831-
and status != ClientConstants.MSG_MLOPS_CLIENT_STATUS_IDLE
826+
status is not None
827+
and status != ClientConstants.MSG_MLOPS_CLIENT_STATUS_OFFLINE
828+
and status != ClientConstants.MSG_MLOPS_CLIENT_STATUS_IDLE
832829
):
833830
return
834831
status = ClientConstants.MSG_MLOPS_CLIENT_STATUS_IDLE
@@ -925,12 +922,11 @@ def setup_agent_mqtt_connection(self, service_config):
925922
MLOpsStatus.get_instance().set_client_agent_status(self.edge_id, ClientConstants.MSG_MLOPS_CLIENT_STATUS_IDLE)
926923
self.release_client_mqtt_mgr()
927924

925+
MLOpsRuntimeLogDaemon.get_instance(self.args).stop_all_log_processor()
926+
928927
def start_agent_mqtt_loop(self):
929928
# Start MQTT message loop
930929
try:
931930
self.mqtt_mgr.loop_forever()
932931
except Exception as e:
933932
pass
934-
935-
936-

python/fedml/cli/server_deployment/server_constants.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -192,33 +192,33 @@ def exit_process(process):
192192
pass
193193

194194
@staticmethod
195-
def exec_console_with_script(script_path, should_capture_stdout_err=False):
196-
if should_capture_stdout_err:
197-
if platform.system() == 'Windows':
198-
script_process = subprocess.Popen(script_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
199-
else:
200-
script_process = subprocess.Popen(['sh', '-c', script_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
195+
def exec_console_with_script(script_path, should_capture_stdout=False, should_capture_stderr=False):
196+
stdout_flag = subprocess.PIPE if should_capture_stdout else sys.stdout
197+
stderr_flag = subprocess.PIPE if should_capture_stderr else sys.stderr
198+
199+
if platform.system() == 'Windows':
200+
script_process = subprocess.Popen(script_path, stdout=stdout_flag, stderr=stderr_flag)
201201
else:
202-
if platform.system() == 'Windows':
203-
script_process = subprocess.Popen(script_path, stdout=sys.stdout, stderr=subprocess.PIPE)
204-
else:
205-
script_process = subprocess.Popen(['sh', '-c', script_path], stdout=sys.stdout, stderr=subprocess.PIPE)
202+
script_process = subprocess.Popen(['sh', '-c', script_path], stdout=stdout_flag, stderr=stderr_flag)
203+
206204
return script_process
207205

208206
@staticmethod
209-
def exec_console_with_shell(shell, script_path, should_capture_stdout_err=False):
210-
if should_capture_stdout_err:
211-
script_process = subprocess.Popen([shell, script_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
212-
else:
213-
script_process = subprocess.Popen([shell, script_path], stdout=sys.stdout, stderr=subprocess.PIPE)
207+
def exec_console_with_shell(shell, script_path, should_capture_stdout=False, should_capture_stderr=False):
208+
stdout_flag = subprocess.PIPE if should_capture_stdout else sys.stdout
209+
stderr_flag = subprocess.PIPE if should_capture_stderr else sys.stderr
210+
211+
script_process = subprocess.Popen([shell, script_path], stdout=stdout_flag, stderr=stderr_flag)
212+
214213
return script_process
215214

216215
@staticmethod
217-
def exec_console_with_shell_script_list(shell_script_list, should_capture_stdout_err=False):
218-
if should_capture_stdout_err:
219-
script_process = subprocess.Popen(shell_script_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
220-
else:
221-
script_process = subprocess.Popen(shell_script_list, stdout=sys.stdout, stderr=sys.stderr)
216+
def exec_console_with_shell_script_list(shell_script_list, should_capture_stdout=False, should_capture_stderr=False):
217+
stdout_flag = subprocess.PIPE if should_capture_stdout else sys.stdout
218+
stderr_flag = subprocess.PIPE if should_capture_stderr else sys.stderr
219+
220+
script_process = subprocess.Popen(shell_script_list, stdout=stdout_flag, stderr=stderr_flag)
221+
222222
return script_process
223223

224224
@staticmethod

0 commit comments

Comments
 (0)