Skip to content

Commit 6ca9637

Browse files
committed
Improve the way profiling is performed
Signed-off-by: Pablo Ribalta <pribalta@nvidia.com>
1 parent aa06105 commit 6ca9637

File tree

6 files changed

+19
-8
lines changed

6 files changed

+19
-8
lines changed

TensorFlow/Segmentation/VNet/Dockerfile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ ADD . /workspace/vnet
44
WORKDIR /workspace/vnet
55

66
RUN pip install --upgrade pip
7-
RUN pip install --user git+https://github.com/NVIDIA/dllogger
87
RUN pip install --disable-pip-version-check -r requirements.txt
98

109

TensorFlow/Segmentation/VNet/hooks/profiling_hook.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,6 @@ def before_run(self, run_context):
3636

3737
def end(self, session):
3838
deltas = [self._timestamps[i + 1] - self._timestamps[i] for i in range(len(self._timestamps) - 1)]
39-
self._logger.log(step=self._step, data={
40-
'average_throughput_' + 'train' if self._training else 'test': self._global_batch_size / np.mean(deltas)})
39+
self._logger.log(step=(), data={
40+
'average_throughput_train' if self._training else 'average_throughput_test': self._global_batch_size / np.mean(deltas)})
4141
self._logger.flush()

TensorFlow/Segmentation/VNet/hooks/train_hook.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def after_run(self,
3636
run_context,
3737
run_values):
3838
if self._step % self._log_every == 0:
39-
self._logger.log(step=self._step, data={'total_loss': run_values.results[0]})
39+
self._logger.log(step=(self._step,), data={'total_loss': str(run_values.results[0])})
4040
self._step += 1
4141

4242
def end(self, session):

TensorFlow/Segmentation/VNet/main.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def main(_):
8787

8888
run_config = tf.estimator.RunConfig(
8989
save_summary_steps=None,
90-
save_checkpoints_steps=dataset.train_steps * FLAGS.train_epochs,
90+
save_checkpoints_steps=None if FLAGS.benchmark else dataset.train_steps * FLAGS.train_epoch,
9191
save_checkpoints_secs=None,
9292
tf_random_seed=None,
9393
session_config=config,
@@ -112,22 +112,32 @@ def main(_):
112112
if hvd.rank() == 0:
113113
train_hooks += [TrainHook(FLAGS.log_every, DLLogger)]
114114

115+
DLLogger.log(step=tuple(), data={"training": "START"})
116+
115117
estimator.train(
116118
input_fn=lambda: dataset.train_fn(FLAGS.augment),
117119
steps=steps,
118120
hooks=train_hooks)
119121

122+
DLLogger.log(step=tuple(), data={"training": "FINISHED"})
123+
120124
if 'evaluate' in FLAGS.exec_mode:
121125
if hvd.rank() == 0:
122126
if FLAGS.train_split >= 1.0:
123127
raise ValueError("Missing argument: --train_split < 1.0")
128+
129+
DLLogger.log(step=tuple(), data={"evaluating": "START"})
130+
124131
result = estimator.evaluate(
125132
input_fn=dataset.eval_fn,
126133
steps=dataset.eval_steps,
127134
hooks=[])
128-
DLLogger.log(step=tuple(), data={'background_dice': result['background dice']})
129-
DLLogger.log(step=tuple(), data={'anterior_dice': result['Anterior dice']})
130-
DLLogger.log(step=tuple(), data={'posterior_dice': result['Posterior dice']})
135+
136+
DLLogger.log(step=tuple(), data={"evaluating": "FINISH"})
137+
138+
DLLogger.log(step=tuple(), data={'background_dice': str(result['background dice'])})
139+
DLLogger.log(step=tuple(), data={'anterior_dice': str(result['Anterior dice'])})
140+
DLLogger.log(step=tuple(), data={'posterior_dice': str(result['Posterior dice'])})
131141

132142
if 'predict' in FLAGS.exec_mode:
133143
count = 1

TensorFlow/Segmentation/VNet/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ SimpleITK==1.1.0
22
requests
33
googledrivedownloader
44
tf2onnx
5+
git+git://github.com/NVIDIA/dllogger#egg=dllogger

TensorFlow/Segmentation/VNet/utils/model_fn.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def vnet_v2(features, labels, mode, params):
122122
loss_scale='dynamic'
123123
)
124124

125+
125126
train_op = optimizer.minimize(total_loss, global_step=global_step)
126127

127128
eval_metric_ops = None

0 commit comments

Comments
 (0)