277 update nvflare example (Project-MONAI#278)

yiheng-wang-nv · wyli · web-flow · commit 96a32f93f536 · 2021-07-26T12:54:29.000+01:00
* update nvflare example

Signed-off-by: Yiheng Wang &lt;vennw@nvidia.com&gt;

* update for docker version

Signed-off-by: Yiheng Wang &lt;vennw@nvidia.com&gt;

Co-authored-by: Wenqi Li &lt;wenqil@nvidia.com&gt;
diff --git a/federated_learning/nvflare/nvflare_example/spleen_example/custom/monai_trainer.py b/federated_learning/nvflare/nvflare_example/spleen_example/custom/monai_trainer.py
@@ -163,16 +163,13 @@ def train(
         self.train_ctx.fl_init_validation_metric = self.eval_engine.state.metrics.get(
             self.eval_engine.state.key_metric_name, -1
         )
-        # record iteration and epoch data before training
-        starting_iters = self.train_engine.state.iteration
-        starting_epochs = self.train_engine.state.epoch
         self.train_engine.run()
         # calculate current iteration and epoch data after training
         self.train_ctx.current_iters = (
-            self.train_engine.state.iteration - starting_iters
+            self.train_engine.state.iteration - self.train_ctx.iter_of_start_time
         )
         self.train_ctx.current_executed_epochs = (
-            self.train_engine.state.epoch - starting_epochs
+            self.train_engine.state.epoch - self.train_ctx.epoch_of_start_time
         )
         # create a new `Shareable` object
         if self.train_engine.state.rank == 0:
diff --git a/federated_learning/nvflare/nvflare_example/spleen_example/custom/train_configer.py b/federated_learning/nvflare/nvflare_example/spleen_example/custom/train_configer.py
@@ -57,7 +57,6 @@ class TrainConfiger:
     for MONAI trainer.
     Please check the implementation of `SupervisedEvaluator` and `SupervisedTrainer`
     from `monai.engines` and determine which components can be used.
-
     Args:
         config_root: root folder path of config files.
         wf_config_file_name: json file name of the workflow config file.
@@ -84,7 +83,6 @@ def __init__(
             amp: whether to enable auto-mixed-precision training.
             use_gpu: whether to use GPU in training.
             multi_gpu: whether to use multiple GPUs for distributed training.
-
         """
         self.max_epochs = wf_config["max_epochs"]
         self.learning_rate = wf_config["learning_rate"]
diff --git a/federated_learning/nvflare/nvflare_example/spleen_example/custom/utils.py b/federated_learning/nvflare/nvflare_example/spleen_example/custom/utils.py
@@ -110,10 +110,11 @@ def generate_shareable(self, train_ctx: TrainContext, fl_ctx: FLContext):
         should be added into ShareableKey.META.
         """
 
-        # input the initlal metric into meta data. You can also add other parameters.
+        # input the initial metric into meta data. You can also add other parameters.
         meta_data = {}
         meta_data[FLConstants.INITIAL_METRICS] = train_ctx.fl_init_validation_metric
         meta_data[FLConstants.CURRENT_LEARNING_RATE] = train_ctx.current_learning_rate
+        meta_data[FLConstants.NUM_STEPS_CURRENT_ROUND] = train_ctx.current_iters
 
         shareable = Shareable()
         shareable[ShareableKey.TYPE] = ShareableValue.TYPE_WEIGHT_DIFF
diff --git a/federated_learning/nvflare/nvflare_example_docker/docker_files/Dockerfile b/federated_learning/nvflare/nvflare_example_docker/docker_files/Dockerfile
@@ -6,4 +6,4 @@ RUN apt-get -qq update
 RUN apt-get install -qq -y zip
 
 RUN python -m pip install --upgrade pip
-RUN python -m pip install nvflare==1.0.0
+RUN python -m pip install nvflare==1.0.2
diff --git a/federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/monai_trainer.py b/federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/monai_trainer.py
@@ -163,16 +163,13 @@ def train(
         self.train_ctx.fl_init_validation_metric = self.eval_engine.state.metrics.get(
             self.eval_engine.state.key_metric_name, -1
         )
-        # record iteration and epoch data before training
-        starting_iters = self.train_engine.state.iteration
-        starting_epochs = self.train_engine.state.epoch
         self.train_engine.run()
         # calculate current iteration and epoch data after training
         self.train_ctx.current_iters = (
-            self.train_engine.state.iteration - starting_iters
+            self.train_engine.state.iteration - self.train_ctx.iter_of_start_time
         )
         self.train_ctx.current_executed_epochs = (
-            self.train_engine.state.epoch - starting_epochs
+            self.train_engine.state.epoch - self.train_ctx.epoch_of_start_time
         )
         # create a new `Shareable` object
         if self.train_engine.state.rank == 0:
diff --git a/federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/train_configer.py b/federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/train_configer.py
@@ -57,7 +57,6 @@ class TrainConfiger:
     for MONAI trainer.
     Please check the implementation of `SupervisedEvaluator` and `SupervisedTrainer`
     from `monai.engines` and determine which components can be used.
-
     Args:
         config_root: root folder path of config files.
         wf_config_file_name: json file name of the workflow config file.
@@ -84,7 +83,6 @@ def __init__(
             amp: whether to enable auto-mixed-precision training.
             use_gpu: whether to use GPU in training.
             multi_gpu: whether to use multiple GPUs for distributed training.
-
         """
         self.max_epochs = wf_config["max_epochs"]
         self.learning_rate = wf_config["learning_rate"]
diff --git a/federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/utils.py b/federated_learning/nvflare/nvflare_example_docker/spleen_example/custom/utils.py
@@ -110,10 +110,11 @@ def generate_shareable(self, train_ctx: TrainContext, fl_ctx: FLContext):
         should be added into ShareableKey.META.
         """
 
-        # input the initlal metric into meta data. You can also add other parameters.
+        # input the initial metric into meta data. You can also add other parameters.
         meta_data = {}
         meta_data[FLConstants.INITIAL_METRICS] = train_ctx.fl_init_validation_metric
         meta_data[FLConstants.CURRENT_LEARNING_RATE] = train_ctx.current_learning_rate
+        meta_data[FLConstants.NUM_STEPS_CURRENT_ROUND] = train_ctx.current_iters
 
         shareable = Shareable()
         shareable[ShareableKey.TYPE] = ShareableValue.TYPE_WEIGHT_DIFF

Original file line number	Diff line number	Diff line change
`@@ -163,16 +163,13 @@ def train(`
`163`	`163`	`self.train_ctx.fl_init_validation_metric = self.eval_engine.state.metrics.get(`
`164`	`164`	`self.eval_engine.state.key_metric_name, -1`
`165`	`165`	`)`
`166`		`- # record iteration and epoch data before training`
`167`		`- starting_iters = self.train_engine.state.iteration`
`168`		`- starting_epochs = self.train_engine.state.epoch`
`169`	`166`	`self.train_engine.run()`
`170`	`167`	`# calculate current iteration and epoch data after training`
`171`	`168`	`self.train_ctx.current_iters = (`
`172`		`- self.train_engine.state.iteration - starting_iters`
	`169`	`+ self.train_engine.state.iteration - self.train_ctx.iter_of_start_time`
`173`	`170`	`)`
`174`	`171`	`self.train_ctx.current_executed_epochs = (`
`175`		`- self.train_engine.state.epoch - starting_epochs`
	`172`	`+ self.train_engine.state.epoch - self.train_ctx.epoch_of_start_time`
`176`	`173`	`)`
`177`	`174`	# create a new `Shareable` object
`178`	`175`	`if self.train_engine.state.rank == 0:`