From e29b388883634250e8dd643477eb84f53adee0cb Mon Sep 17 00:00:00 2001 From: Mannat Singh Date: Mon, 16 Mar 2020 08:47:03 -0700 Subject: [PATCH] Plot histograms of parameters to tensorboard (#432) Summary: Pull Request resolved: https://github.com/facebookresearch/ClassyVision/pull/432 - Plot the histogram of weights for every parameter in the model at the end of every train phase. - Updated the various scalars plotted to Tensorboard to have their own tags, just like "Speed" to organize things better Adding the activations and gradients is non-trivial since they depend on the input, so skipping that for now. Reviewed By: vreis Differential Revision: D20427992 fbshipit-source-id: 49a552cb2099782c4fa219f2a402aa89478bccea --- classy_vision/hooks/tensorboard_plot_hook.py | 31 ++++++++++++------- .../hooks_tensorboard_plot_hook_test.py | 17 +++++----- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/classy_vision/hooks/tensorboard_plot_hook.py b/classy_vision/hooks/tensorboard_plot_hook.py index e57edf1523..4d5b906d23 100644 --- a/classy_vision/hooks/tensorboard_plot_hook.py +++ b/classy_vision/hooks/tensorboard_plot_hook.py @@ -60,6 +60,16 @@ def on_phase_start(self, task: "tasks.ClassyTask") -> None: self.wall_times = [] self.num_steps_global = [] + if not is_master(): + return + + # log the parameters before training starts + if task.train and task.train_phase_idx == 0: + for name, parameter in task.base_model.named_parameters(): + self.tb_writer.add_histogram( + f"Parameters/{name}", parameter, global_step=-1 + ) + def on_step(self, task: "tasks.ClassyTask") -> None: """Store the observed learning rates.""" if self.learning_rates is None: @@ -92,27 +102,26 @@ def on_phase_end(self, task: "tasks.ClassyTask") -> None: logging.info(f"Plotting to Tensorboard for {phase_type} phase {phase_type_idx}") phase_type = task.phase_type - loss_key = f"{phase_type}_loss" - learning_rate_key = f"{phase_type}_learning_rate_updates" + learning_rate_key = f"Learning Rate/{phase_type}" if task.train: - for loss, learning_rate, global_step, wall_time in zip( - task.losses, self.learning_rates, self.num_steps_global, self.wall_times + for learning_rate, global_step, wall_time in zip( + self.learning_rates, self.num_steps_global, self.wall_times ): - loss /= task.get_batchsize_per_replica() - self.tb_writer.add_scalar( - loss_key, loss, global_step=global_step, walltime=wall_time - ) self.tb_writer.add_scalar( learning_rate_key, learning_rate, global_step=global_step, walltime=wall_time, ) + for name, parameter in task.base_model.named_parameters(): + self.tb_writer.add_histogram( + f"Parameters/{name}", parameter, global_step=phase_type_idx + ) loss_avg = sum(task.losses) / (batches * task.get_batchsize_per_replica()) - loss_key = "avg_{phase_type}_loss".format(phase_type=task.phase_type) + loss_key = "Losses/{phase_type}".format(phase_type=task.phase_type) self.tb_writer.add_scalar(loss_key, loss_avg, global_step=phase_type_idx) # plot meters which return a dict @@ -122,13 +131,13 @@ def on_phase_end(self, task: "tasks.ClassyTask") -> None: continue for name, value in meter.value.items(): if isinstance(value, float): - meter_key = f"{phase_type}_{meter.name}_{name}" + meter_key = f"Meters/{phase_type}/{meter.name}/{name}" self.tb_writer.add_scalar( meter_key, value, global_step=phase_type_idx ) else: log.warn( - f"Skipping meter name {meter.name}_{name} with value: {value}" + f"Skipping meter name {meter.name}/{name} with value: {value}" ) continue diff --git a/test/manual/hooks_tensorboard_plot_hook_test.py b/test/manual/hooks_tensorboard_plot_hook_test.py index d1a80ccc92..6214c4f398 100644 --- a/test/manual/hooks_tensorboard_plot_hook_test.py +++ b/test/manual/hooks_tensorboard_plot_hook_test.py @@ -93,24 +93,20 @@ def test_writer(self, mock_is_master_func: mock.MagicMock) -> None: if master: # add_scalar() should have been called with the right scalars if train: - loss_key = f"{phase_type}_loss" - learning_rate_key = f"{phase_type}_learning_rate_updates" - summary_writer.add_scalar.assert_any_call( - loss_key, mock.ANY, global_step=mock.ANY, walltime=mock.ANY - ) + learning_rate_key = f"Learning Rate/{phase_type}" summary_writer.add_scalar.assert_any_call( learning_rate_key, mock.ANY, global_step=mock.ANY, walltime=mock.ANY, ) - avg_loss_key = f"avg_{phase_type}_loss" + avg_loss_key = f"Losses/{phase_type}" summary_writer.add_scalar.assert_any_call( avg_loss_key, mock.ANY, global_step=mock.ANY ) for meter in task.meters: for name in meter.value: - meter_key = f"{phase_type}_{meter.name}_{name}" + meter_key = f"Meters/{phase_type}/{meter.name}/{name}" summary_writer.add_scalar.assert_any_call( meter_key, mock.ANY, global_step=mock.ANY ) @@ -135,6 +131,11 @@ def __init__(self): def add_scalar(self, key, value, global_step=None, walltime=None) -> None: self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value] + def add_histogram( + self, key, value, global_step=None, walltime=None + ) -> None: + return + def flush(self): return @@ -154,6 +155,6 @@ def flush(self): # We have 20 samples, batch size is 10. Each epoch is done in two steps. self.assertEqual( - writer.scalar_logs["train_learning_rate_updates"], + writer.scalar_logs["Learning Rate/train"], [0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6], )