Skip to content
This repository has been archived by the owner on Jul 1, 2024. It is now read-only.

Plot histograms of parameters to tensorboard #432

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions classy_vision/hooks/tensorboard_plot_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ def on_phase_start(self, task: "tasks.ClassyTask") -> None:
self.wall_times = []
self.num_steps_global = []

if not is_master():
return

# log the parameters before training starts
if task.train and task.train_phase_idx == 0:
for name, parameter in task.base_model.named_parameters():
self.tb_writer.add_histogram(
f"Parameters/{name}", parameter, global_step=-1
)

def on_step(self, task: "tasks.ClassyTask") -> None:
"""Store the observed learning rates."""
if self.learning_rates is None:
Expand Down Expand Up @@ -92,27 +102,26 @@ def on_phase_end(self, task: "tasks.ClassyTask") -> None:
logging.info(f"Plotting to Tensorboard for {phase_type} phase {phase_type_idx}")

phase_type = task.phase_type
loss_key = f"{phase_type}_loss"
learning_rate_key = f"{phase_type}_learning_rate_updates"
learning_rate_key = f"Learning Rate/{phase_type}"

if task.train:
for loss, learning_rate, global_step, wall_time in zip(
task.losses, self.learning_rates, self.num_steps_global, self.wall_times
for learning_rate, global_step, wall_time in zip(
self.learning_rates, self.num_steps_global, self.wall_times
):
loss /= task.get_batchsize_per_replica()
self.tb_writer.add_scalar(
loss_key, loss, global_step=global_step, walltime=wall_time
)
self.tb_writer.add_scalar(
learning_rate_key,
learning_rate,
global_step=global_step,
walltime=wall_time,
)
for name, parameter in task.base_model.named_parameters():
self.tb_writer.add_histogram(
f"Parameters/{name}", parameter, global_step=phase_type_idx
)

loss_avg = sum(task.losses) / (batches * task.get_batchsize_per_replica())

loss_key = "avg_{phase_type}_loss".format(phase_type=task.phase_type)
loss_key = "Losses/{phase_type}".format(phase_type=task.phase_type)
self.tb_writer.add_scalar(loss_key, loss_avg, global_step=phase_type_idx)

# plot meters which return a dict
Expand All @@ -122,13 +131,13 @@ def on_phase_end(self, task: "tasks.ClassyTask") -> None:
continue
for name, value in meter.value.items():
if isinstance(value, float):
meter_key = f"{phase_type}_{meter.name}_{name}"
meter_key = f"Meters/{phase_type}/{meter.name}/{name}"
self.tb_writer.add_scalar(
meter_key, value, global_step=phase_type_idx
)
else:
log.warn(
f"Skipping meter name {meter.name}_{name} with value: {value}"
f"Skipping meter name {meter.name}/{name} with value: {value}"
)
continue

Expand Down
17 changes: 9 additions & 8 deletions test/manual/hooks_tensorboard_plot_hook_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,24 +93,20 @@ def test_writer(self, mock_is_master_func: mock.MagicMock) -> None:
if master:
# add_scalar() should have been called with the right scalars
if train:
loss_key = f"{phase_type}_loss"
learning_rate_key = f"{phase_type}_learning_rate_updates"
summary_writer.add_scalar.assert_any_call(
loss_key, mock.ANY, global_step=mock.ANY, walltime=mock.ANY
)
learning_rate_key = f"Learning Rate/{phase_type}"
summary_writer.add_scalar.assert_any_call(
learning_rate_key,
mock.ANY,
global_step=mock.ANY,
walltime=mock.ANY,
)
avg_loss_key = f"avg_{phase_type}_loss"
avg_loss_key = f"Losses/{phase_type}"
summary_writer.add_scalar.assert_any_call(
avg_loss_key, mock.ANY, global_step=mock.ANY
)
for meter in task.meters:
for name in meter.value:
meter_key = f"{phase_type}_{meter.name}_{name}"
meter_key = f"Meters/{phase_type}/{meter.name}/{name}"
summary_writer.add_scalar.assert_any_call(
meter_key, mock.ANY, global_step=mock.ANY
)
Expand All @@ -135,6 +131,11 @@ def __init__(self):
def add_scalar(self, key, value, global_step=None, walltime=None) -> None:
self.scalar_logs[key] = self.scalar_logs.get(key, []) + [value]

def add_histogram(
self, key, value, global_step=None, walltime=None
) -> None:
return

def flush(self):
return

Expand All @@ -154,6 +155,6 @@ def flush(self):

# We have 20 samples, batch size is 10. Each epoch is done in two steps.
self.assertEqual(
writer.scalar_logs["train_learning_rate_updates"],
writer.scalar_logs["Learning Rate/train"],
[0, 1 / 6, 2 / 6, 3 / 6, 4 / 6, 5 / 6],
)