forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_cuda_trace.py
99 lines (71 loc) · 3.12 KB
/
_cuda_trace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import logging
from typing import Callable, Generic, List
from typing_extensions import ParamSpec
logger = logging.getLogger(__name__)
P = ParamSpec("P")
class CallbackRegistry(Generic[P]):
def __init__(self, name: str):
self.name = name
self.callback_list: List[Callable[P, None]] = []
def add_callback(self, cb: Callable[P, None]) -> None:
self.callback_list.append(cb)
def fire_callbacks(self, *args: P.args, **kwargs: P.kwargs) -> None:
for cb in self.callback_list:
try:
cb(*args, **kwargs)
except Exception as e:
logger.exception(
f"Exception in callback for {self.name} registered with CUDA trace"
)
CUDAEventCreationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"CUDA event creation"
)
CUDAEventDeletionCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"CUDA event deletion"
)
CUDAEventRecordCallbacks: "CallbackRegistry[int, int]" = CallbackRegistry(
"CUDA event record"
)
CUDAEventWaitCallbacks: "CallbackRegistry[int, int]" = CallbackRegistry(
"CUDA event wait"
)
CUDAMemoryAllocationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"CUDA memory allocation"
)
CUDAMemoryDeallocationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"CUDA memory deallocation"
)
CUDAStreamCreationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"CUDA stream creation"
)
CUDADeviceSynchronizationCallbacks: "CallbackRegistry[[]]" = CallbackRegistry(
"CUDA device synchronization"
)
CUDAStreamSynchronizationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"CUDA stream synchronization"
)
CUDAEventSynchronizationCallbacks: "CallbackRegistry[int]" = CallbackRegistry(
"CUDA event synchronization"
)
def register_callback_for_cuda_event_creation(cb: Callable[[int], None]) -> None:
CUDAEventCreationCallbacks.add_callback(cb)
def register_callback_for_cuda_event_deletion(cb: Callable[[int], None]) -> None:
CUDAEventDeletionCallbacks.add_callback(cb)
def register_callback_for_cuda_event_record(cb: Callable[[int, int], None]) -> None:
CUDAEventRecordCallbacks.add_callback(cb)
def register_callback_for_cuda_event_wait(cb: Callable[[int, int], None]) -> None:
CUDAEventWaitCallbacks.add_callback(cb)
def register_callback_for_cuda_memory_allocation(cb: Callable[[int], None]) -> None:
CUDAMemoryAllocationCallbacks.add_callback(cb)
def register_callback_for_cuda_memory_deallocation(cb: Callable[[int], None]) -> None:
CUDAMemoryDeallocationCallbacks.add_callback(cb)
def register_callback_for_cuda_stream_creation(cb: Callable[[int], None]) -> None:
CUDAStreamCreationCallbacks.add_callback(cb)
def register_callback_for_cuda_device_synchronization(cb: Callable[[], None]) -> None:
CUDADeviceSynchronizationCallbacks.add_callback(cb)
def register_callback_for_cuda_stream_synchronization(
cb: Callable[[int], None]
) -> None:
CUDAStreamSynchronizationCallbacks.add_callback(cb)
def register_callback_for_cuda_event_synchronization(cb: Callable[[int], None]) -> None:
CUDAEventSynchronizationCallbacks.add_callback(cb)