7
7
import os
8
8
import tempfile
9
9
import time
10
+ import warnings
10
11
from collections import Counter
11
12
import torch
12
13
from fvcore .common .checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer
31
32
"AutogradProfiler" ,
32
33
"EvalHook" ,
33
34
"PreciseBN" ,
35
+ "TorchProfiler" ,
34
36
]
35
37
36
38
@@ -268,45 +270,59 @@ def load_state_dict(self, state_dict):
268
270
self .scheduler .load_state_dict (state_dict )
269
271
270
272
271
- class AutogradProfiler (HookBase ):
273
+ class TorchProfiler (HookBase ):
272
274
"""
273
- A hook which runs `torch.autograd. profiler.profile`.
275
+ A hook which runs `torch.profiler.profile`.
274
276
275
277
Examples:
276
278
::
277
- hooks.AutogradProfiler (
278
- lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR
279
+ hooks.TorchProfiler (
280
+ lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR
279
281
)
280
282
281
283
The above example will run the profiler for iteration 10~20 and dump
282
284
results to ``OUTPUT_DIR``. We did not profile the first few iterations
283
285
because they are typically slower than the rest.
284
- The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
285
-
286
- Note:
287
- When used together with NCCL on older version of GPUs,
288
- autograd profiler may cause deadlock because it unnecessarily allocates
289
- memory on every device it sees. The memory management calls, if
290
- interleaved with NCCL calls, lead to deadlock on GPUs that do not
291
- support ``cudaLaunchCooperativeKernelMultiDevice``.
286
+ The result files can be loaded in the ``chrome://tracing`` page in chrome browser,
287
+ and the tensorboard visualizations can be visualized using
288
+ ``tensorboard --logdir OUTPUT_DIR/log``
292
289
"""
293
290
294
- def __init__ (self , enable_predicate , output_dir , * , use_cuda = True ):
291
+ def __init__ (self , enable_predicate , output_dir , * , activities = None , save_tensorboard = True ):
295
292
"""
296
293
Args:
297
294
enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
298
295
and returns whether to enable the profiler.
299
296
It will be called once every step, and can be used to select which steps to profile.
300
297
output_dir (str): the output directory to dump tracing files.
301
- use_cuda (bool): same as in `torch.autograd.profiler.profile`.
298
+ activities (iterable): same as in `torch.profiler.profile`.
299
+ save_tensorboard (bool): whether to save tensorboard visualizations at (output_dir)/log/
302
300
"""
303
301
self ._enable_predicate = enable_predicate
304
- self ._use_cuda = use_cuda
302
+ self ._activities = activities
305
303
self ._output_dir = output_dir
304
+ self ._save_tensorboard = save_tensorboard
306
305
307
306
def before_step (self ):
308
307
if self ._enable_predicate (self .trainer ):
309
- self ._profiler = torch .autograd .profiler .profile (use_cuda = self ._use_cuda )
308
+ if self ._save_tensorboard :
309
+ on_trace_ready = torch .profiler .tensorboard_trace_handler (
310
+ os .path .join (
311
+ self ._output_dir ,
312
+ "log" ,
313
+ "profiler-tensorboard-iter{}" .format (self .trainer .iter ),
314
+ )
315
+ )
316
+ else :
317
+ on_trace_ready = None
318
+ self ._profiler = torch .profiler .profile (
319
+ activities = self ._activities ,
320
+ on_trace_ready = on_trace_ready ,
321
+ record_shapes = True ,
322
+ profile_memory = True ,
323
+ with_stack = True ,
324
+ with_flops = True ,
325
+ )
310
326
self ._profiler .__enter__ ()
311
327
else :
312
328
self ._profiler = None
@@ -332,6 +348,51 @@ def after_step(self):
332
348
f .write (content )
333
349
334
350
351
+ class AutogradProfiler (TorchProfiler ):
352
+ """
353
+ A hook which runs `torch.autograd.profiler.profile`.
354
+
355
+ Examples:
356
+ ::
357
+ hooks.AutogradProfiler(
358
+ lambda trainer: 10 < trainer.iter < 20, self.cfg.OUTPUT_DIR
359
+ )
360
+
361
+ The above example will run the profiler for iteration 10~20 and dump
362
+ results to ``OUTPUT_DIR``. We did not profile the first few iterations
363
+ because they are typically slower than the rest.
364
+ The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
365
+
366
+ Note:
367
+ When used together with NCCL on older version of GPUs,
368
+ autograd profiler may cause deadlock because it unnecessarily allocates
369
+ memory on every device it sees. The memory management calls, if
370
+ interleaved with NCCL calls, lead to deadlock on GPUs that do not
371
+ support ``cudaLaunchCooperativeKernelMultiDevice``.
372
+ """
373
+
374
+ def __init__ (self , enable_predicate , output_dir , * , use_cuda = True ):
375
+ """
376
+ Args:
377
+ enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
378
+ and returns whether to enable the profiler.
379
+ It will be called once every step, and can be used to select which steps to profile.
380
+ output_dir (str): the output directory to dump tracing files.
381
+ use_cuda (bool): same as in `torch.autograd.profiler.profile`.
382
+ """
383
+ warnings .warn ("AutogradProfiler has been deprecated in favor of TorchProfiler." )
384
+ self ._enable_predicate = enable_predicate
385
+ self ._use_cuda = use_cuda
386
+ self ._output_dir = output_dir
387
+
388
+ def before_step (self ):
389
+ if self ._enable_predicate (self .trainer ):
390
+ self ._profiler = torch .autograd .profiler .profile (use_cuda = self ._use_cuda )
391
+ self ._profiler .__enter__ ()
392
+ else :
393
+ self ._profiler = None
394
+
395
+
335
396
class EvalHook (HookBase ):
336
397
"""
337
398
Run an evaluation function periodically, and at the end of training.
0 commit comments