Skip to content

Commit 96a32f9

Browse files
yiheng-wang-nvwyli
andauthored
277 update nvflare example (Project-MONAI#278)
* update nvflare example Signed-off-by: Yiheng Wang <vennw@nvidia.com> * update for docker version Signed-off-by: Yiheng Wang <vennw@nvidia.com> Co-authored-by: Wenqi Li <wenqil@nvidia.com>
1 parent 7651855 commit 96a32f9

File tree

7 files changed

+9
-17
lines changed

7 files changed

+9
-17
lines changed

federated_learning/nvflare/nvflare_example/spleen_example/custom/monai_trainer.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,16 +163,13 @@ def train(
163163
self.train_ctx.fl_init_validation_metric = self.eval_engine.state.metrics.get(
164164
self.eval_engine.state.key_metric_name, -1
165165
)
166-
# record iteration and epoch data before training
167-
starting_iters = self.train_engine.state.iteration
168-
starting_epochs = self.train_engine.state.epoch
169166
self.train_engine.run()
170167
# calculate current iteration and epoch data after training
171168
self.train_ctx.current_iters = (
172-
self.train_engine.state.iteration - starting_iters
169+
self.train_engine.state.iteration - self.train_ctx.iter_of_start_time
173170
)
174171
self.train_ctx.current_executed_epochs = (
175-
self.train_engine.state.epoch - starting_epochs
172+
self.train_engine.state.epoch - self.train_ctx.epoch_of_start_time
176173
)
177174
# create a new `Shareable` object
178175
if self.train_engine.state.rank == 0:

federated_learning/nvflare/nvflare_example/spleen_example/custom/train_configer.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ class TrainConfiger:
5757
for MONAI trainer.
5858
Please check the implementation of `SupervisedEvaluator` and `SupervisedTrainer`
5959
from `monai.engines` and determine which components can be used.
60-
6160
Args:
6261
config_root: root folder path of config files.
6362
wf_config_file_name: json file name of the workflow config file.
@@ -84,7 +83,6 @@ def __init__(
8483
amp: whether to enable auto-mixed-precision training.
8584
use_gpu: whether to use GPU in training.
8685
multi_gpu: whether to use multiple GPUs for distributed training.
87-
8886
"""
8987
self.max_epochs = wf_config["max_epochs"]
9088
self.learning_rate = wf_config["learning_rate"]

federated_learning/nvflare/nvflare_example/spleen_example/custom/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,11 @@ def generate_shareable(self, train_ctx: TrainContext, fl_ctx: FLContext):
110110
should be added into ShareableKey.META.
111111
"""
112112

113-
# input the initlal metric into meta data. You can also add other parameters.
113+
# input the initial metric into meta data. You can also add other parameters.
114114
meta_data = {}
115115
meta_data[FLConstants.INITIAL_METRICS] = train_ctx.fl_init_validation_metric
116116
meta_data[FLConstants.CURRENT_LEARNING_RATE] = train_ctx.current_learning_rate
117+
meta_data[FLConstants.NUM_STEPS_CURRENT_ROUND] = train_ctx.current_iters
117118

118119
shareable = Shareable()
119120
shareable[ShareableKey.TYPE] = ShareableValue.TYPE_WEIGHT_DIFF

federated_learning/nvflare/nvflare_example_docker/docker_files/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ RUN apt-get -qq update
66
RUN apt-get install -qq -y zip
77

88
RUN python -m pip install --upgrade pip
9-
RUN python -m pip install nvflare==1.0.0
9+
RUN python -m pip install nvflare==1.0.2

federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/monai_trainer.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,16 +163,13 @@ def train(
163163
self.train_ctx.fl_init_validation_metric = self.eval_engine.state.metrics.get(
164164
self.eval_engine.state.key_metric_name, -1
165165
)
166-
# record iteration and epoch data before training
167-
starting_iters = self.train_engine.state.iteration
168-
starting_epochs = self.train_engine.state.epoch
169166
self.train_engine.run()
170167
# calculate current iteration and epoch data after training
171168
self.train_ctx.current_iters = (
172-
self.train_engine.state.iteration - starting_iters
169+
self.train_engine.state.iteration - self.train_ctx.iter_of_start_time
173170
)
174171
self.train_ctx.current_executed_epochs = (
175-
self.train_engine.state.epoch - starting_epochs
172+
self.train_engine.state.epoch - self.train_ctx.epoch_of_start_time
176173
)
177174
# create a new `Shareable` object
178175
if self.train_engine.state.rank == 0:

federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/train_configer.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ class TrainConfiger:
5757
for MONAI trainer.
5858
Please check the implementation of `SupervisedEvaluator` and `SupervisedTrainer`
5959
from `monai.engines` and determine which components can be used.
60-
6160
Args:
6261
config_root: root folder path of config files.
6362
wf_config_file_name: json file name of the workflow config file.
@@ -84,7 +83,6 @@ def __init__(
8483
amp: whether to enable auto-mixed-precision training.
8584
use_gpu: whether to use GPU in training.
8685
multi_gpu: whether to use multiple GPUs for distributed training.
87-
8886
"""
8987
self.max_epochs = wf_config["max_epochs"]
9088
self.learning_rate = wf_config["learning_rate"]

federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,11 @@ def generate_shareable(self, train_ctx: TrainContext, fl_ctx: FLContext):
110110
should be added into ShareableKey.META.
111111
"""
112112

113-
# input the initlal metric into meta data. You can also add other parameters.
113+
# input the initial metric into meta data. You can also add other parameters.
114114
meta_data = {}
115115
meta_data[FLConstants.INITIAL_METRICS] = train_ctx.fl_init_validation_metric
116116
meta_data[FLConstants.CURRENT_LEARNING_RATE] = train_ctx.current_learning_rate
117+
meta_data[FLConstants.NUM_STEPS_CURRENT_ROUND] = train_ctx.current_iters
117118

118119
shareable = Shareable()
119120
shareable[ShareableKey.TYPE] = ShareableValue.TYPE_WEIGHT_DIFF

0 commit comments

Comments
 (0)