forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path__init__.py
487 lines (382 loc) · 15.1 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
# mypy: allow-untyped-defs
r"""
This package introduces support for the XPU backend, specifically tailored for
Intel GPU optimization.
This package is lazily initialized, so you can always import it, and use
:func:`is_available()` to determine if your system supports XPU.
"""
import threading
import traceback
from functools import lru_cache
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import torch
import torch._C
from torch import device as _device
from torch._utils import _dummy_type, _LazySeedTracker
from ._utils import _get_device_index
from .streams import Event, Stream
_initialized = False
_tls = threading.local()
_initialization_lock = threading.Lock()
_queued_calls: List[
Tuple[Callable[[], None], List[str]]
] = [] # don't invoke these until initialization occurs
_is_in_bad_fork = getattr(torch._C, "_xpu_isInBadFork", lambda: False)
_device_t = Union[_device, str, int, None]
_lazy_seed_tracker = _LazySeedTracker()
default_generators: Tuple[torch._C.Generator] = () # type: ignore[assignment]
def _is_compiled() -> bool:
r"""Return true if compile with XPU support."""
return torch._C._has_xpu
if _is_compiled():
_XpuDeviceProperties = torch._C._XpuDeviceProperties
_exchange_device = torch._C._xpu_exchangeDevice
_maybe_exchange_device = torch._C._xpu_maybeExchangeDevice
else:
# Define dummy if PyTorch was compiled without XPU
_XpuDeviceProperties = _dummy_type("_XpuDeviceProperties") # type: ignore[assignment, misc]
def _exchange_device(device: int) -> int:
raise NotImplementedError("PyTorch was compiled without XPU support")
def _maybe_exchange_device(device: int) -> int:
raise NotImplementedError("PyTorch was compiled without XPU support")
@lru_cache(maxsize=1)
def device_count() -> int:
r"""Return the number of XPU device available."""
if not _is_compiled():
return 0
return torch._C._xpu_getDeviceCount()
def is_available() -> bool:
r"""Return a bool indicating if XPU is currently available."""
# This function nerver throws.
return device_count() > 0
def is_bf16_supported():
r"""Return a bool indicating if the current XPU device supports dtype bfloat16."""
return True
def is_initialized():
r"""Return whether PyTorch's XPU state has been initialized."""
return _initialized and not _is_in_bad_fork()
def _lazy_call(callable, **kwargs):
if is_initialized():
callable()
else:
global _lazy_seed_tracker
if kwargs.get("seed_all", False):
_lazy_seed_tracker.queue_seed_all(callable, traceback.format_stack())
elif kwargs.get("seed", False):
_lazy_seed_tracker.queue_seed(callable, traceback.format_stack())
else:
# Don't store the actual traceback to avoid memory cycle
_queued_calls.append((callable, traceback.format_stack()))
def init():
r"""Initialize PyTorch's XPU state.
This is a Python API about lazy initialization that avoids initializing
XPU until the first time it is accessed. Does nothing if the XPU state is
already initialized.
"""
_lazy_init()
def _lazy_init():
global _initialized, _queued_calls
if is_initialized() or hasattr(_tls, "is_initializing"):
return
with _initialization_lock:
# This test was was protected via GIL. Double-check whether XPU has
# already been initialized.
if is_initialized():
return
# Stop promptly upon encountering a bad fork error.
if _is_in_bad_fork():
raise RuntimeError(
"Cannot re-initialize XPU in forked subprocess. To use XPU with "
"multiprocessing, you must use the 'spawn' start method"
)
if not _is_compiled():
raise AssertionError("Torch not compiled with XPU enabled")
# This function inits XPU backend and detects bad fork processing.
torch._C._xpu_init()
# Some of the queued calls may reentrantly call _lazy_init(); We need to
# just return without initializing in that case.
_tls.is_initializing = True
for calls in _lazy_seed_tracker.get_calls():
if calls:
_queued_calls.append(calls)
try:
for queued_call, orig_traceback in _queued_calls:
try:
queued_call()
except Exception as e:
msg = (
f"XPU call failed lazily at initialization with error: {str(e)}\n\n"
f"XPU call was originally invoked at:\n\n{''.join(orig_traceback)}"
)
raise Exception(msg) from e # noqa: TRY002
finally:
delattr(_tls, "is_initializing")
_initialized = True
class _DeviceGuard:
def __init__(self, index: int):
self.idx = index
self.prev_idx = -1
def __enter__(self):
self.prev_idx = torch.xpu._exchange_device(self.idx)
def __exit__(self, type: Any, value: Any, traceback: Any):
self.idx = torch.xpu._maybe_exchange_device(self.prev_idx)
return False
class device:
r"""Context-manager that changes the selected device.
Args:
device (torch.device or int or str): device index to select. It's a no-op if
this argument is a negative integer or ``None``.
"""
def __init__(self, device: Any):
self.idx = _get_device_index(device, optional=True)
self.prev_idx = -1
def __enter__(self):
self.prev_idx = torch.xpu._exchange_device(self.idx)
def __exit__(self, type: Any, value: Any, traceback: Any):
self.idx = torch.xpu._maybe_exchange_device(self.prev_idx)
return False
class device_of(device):
r"""Context-manager that changes the current device to that of given object.
You can use both tensors and storages as arguments. If a given object is
not allocated on a XPU, this is a no-op.
Args:
obj (Tensor or Storage): object allocated on the selected device.
"""
def __init__(self, obj):
idx = obj.get_device() if obj.is_xpu else -1
super().__init__(idx)
def set_device(device: _device_t) -> None:
r"""Set the current device.
Args:
device (torch.device or int or str): selected device. This function is a
no-op if this argument is negative.
"""
_lazy_init()
device = _get_device_index(device)
if device >= 0:
torch._C._xpu_setDevice(device)
def get_device_name(device: Optional[_device_t] = None) -> str:
r"""Get the name of a device.
Args:
device (torch.device or int or str, optional): device for which to
return the name. This function is a no-op if this argument is a
negative integer. It uses the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
Returns:
str: the name of the device
"""
return get_device_properties(device).name
@lru_cache(None)
def get_device_capability(device: Optional[_device_t] = None) -> Dict[str, Any]:
r"""Get the xpu capability of a device.
Args:
device (torch.device or int or str, optional): device for which to
return the device capability. This function is a no-op if this
argument is a negative integer. It uses the current device, given by
:func:`~torch.xpu.current_device`, if :attr:`device` is ``None``
(default).
Returns:
Dict[str, Any]: the xpu capability dictionary of the device
"""
props = get_device_properties(device)
return {
prop: getattr(props, prop) for prop in dir(props) if not prop.startswith("__")
}
def get_device_properties(device: Optional[_device_t] = None) -> _XpuDeviceProperties:
r"""Get the properties of a device.
Args:
device (torch.device or int or str): device for which to return the
properties of the device.
Returns:
_XpuDeviceProperties: the properties of the device
"""
_lazy_init()
device = _get_device_index(device, optional=True)
if device < 0 or device >= device_count():
raise AssertionError("Invalid device index")
return _get_device_properties(device) # type: ignore[name-defined] # noqa: F821
def current_device() -> int:
r"""Return the index of a currently selected device."""
_lazy_init()
return torch._C._xpu_getDevice()
def _get_device(device: Union[int, str, torch.device]) -> torch.device:
r"""Return the torch.device type object from the passed in device.
Args:
device (torch.device or int or str): selected device.
"""
if isinstance(device, str):
device = torch.device(device)
elif isinstance(device, int):
device = torch.device("xpu", device)
return device
class StreamContext:
r"""Context-manager that selects a given stream.
All XPU kernels queued within its context will be enqueued on a selected
stream.
Args:
Stream (Stream): selected stream. This manager is a no-op if it's
``None``.
.. note:: Streams are per-device.
"""
cur_stream: Optional["torch.xpu.Stream"]
def __init__(self, stream: Optional["torch.xpu.Stream"]):
self.stream = stream
self.idx = _get_device_index(None, True)
if self.idx is None:
self.idx = -1
def __enter__(self):
cur_stream = self.stream
if cur_stream is None or self.idx == -1:
return
self.src_prev_stream = torch.xpu.current_stream(None)
# If the stream is not on the current device, then set the current stream on the device
if self.src_prev_stream.device != cur_stream.device:
with device(cur_stream.device):
self.dst_prev_stream = torch.xpu.current_stream(cur_stream.device)
torch.xpu.set_stream(cur_stream)
def __exit__(self, type: Any, value: Any, traceback: Any):
cur_stream = self.stream
if cur_stream is None or self.idx == -1:
return
# Reset the stream on the original device and destination device
if self.src_prev_stream.device != cur_stream.device:
torch.xpu.set_stream(self.dst_prev_stream)
torch.xpu.set_stream(self.src_prev_stream)
def stream(stream: Optional["torch.xpu.Stream"]) -> StreamContext:
r"""Wrap around the Context-manager StreamContext that selects a given stream.
Arguments:
stream (Stream): selected stream. This manager is a no-op if it's ``None``.
"""
return StreamContext(stream)
def _set_stream_by_id(stream_id, device_index, device_type):
r"""set stream specified by the stream id, device index and device type
Args: stream_id (int): not visible to the user, used to assigned to the specific stream.
device_index (int): selected device index.
device_type (int): selected device type.
"""
torch._C._xpu_setStream(
stream_id=stream_id,
device_index=device_index,
device_type=device_type,
)
def set_stream(stream: Stream):
r"""Set the current stream.This is a wrapper API to set the stream.
Usage of this function is discouraged in favor of the ``stream``
context manager.
Args:
stream (Stream): selected stream. This function is a no-op
if this argument is ``None``.
"""
if stream is None:
return
_lazy_init()
_set_stream_by_id(
stream_id=stream.stream_id,
device_index=stream.device_index,
device_type=stream.device_type,
)
def current_stream(device: Optional[_device_t] = None) -> Stream:
r"""Return the currently selected :class:`Stream` for a given device.
Args:
device (torch.device or int, optional): selected device. Returns
the currently selected :class:`Stream` for the current device, given
by :func:`~torch.xpu.current_device`, if :attr:`device` is ``None``
(default).
"""
_lazy_init()
streamdata = torch._C._xpu_getCurrentStream(
_get_device_index(device, optional=True)
)
return Stream(
stream_id=streamdata[0], device_index=streamdata[1], device_type=streamdata[2]
)
def synchronize(device: _device_t = None) -> None:
r"""Wait for all kernels in all streams on a XPU device to complete.
Args:
device (torch.device or int, optional): device for which to synchronize.
It uses the current device, given by :func:`~torch.xpu.current_device`,
if :attr:`device` is ``None`` (default).
"""
_lazy_init()
device = _get_device_index(device, optional=True)
return torch._C._xpu_synchronize(device)
def empty_cache() -> None:
r"""Release all unoccupied cached memory currently held by the caching
allocator so that those can be used in other XPU application.
.. note::
:func:`~torch.xpu.empty_cache` doesn't increase the amount of XPU
memory available for PyTorch. However, it may help reduce fragmentation
of XPU memory in certain cases.
"""
if is_initialized():
torch._C._xpu_emptyCache()
def _get_generator(device: torch.device) -> torch._C.Generator:
r"""Return the XPU Generator object for the given device.
Args:
device (torch.device): selected device.
"""
idx = device.index
if idx is None:
idx = current_device()
return torch.xpu.default_generators[idx]
def _set_rng_state_offset(
offset: int, device: Union[int, str, torch.device] = "xpu"
) -> None:
r"""Set the random number generator state offset of the specified GPU.
Args:
offset (int): The desired offset
device (torch.device or int, optional): The device to set the RNG state.
Default: ``'xpu'`` (i.e., ``torch.device('xpu')``, the current XPU device).
"""
final_device = _get_device(device)
def cb():
default_generator = _get_generator(final_device)
default_generator.set_offset(offset)
_lazy_call(cb)
def _get_rng_state_offset(device: Union[int, str, torch.device] = "xpu") -> int:
r"""Return the random number generator state offset of the specified GPU.
Args:
device (torch.device or int, optional): The device to return the RNG state offset of.
Default: ``'xpu'`` (i.e., ``torch.device('xpu')``, the current XPU device).
.. warning::
This function eagerly initializes XPU.
"""
_lazy_init()
final_device = _get_device(device)
default_generator = _get_generator(final_device)
return default_generator.get_offset()
from .random import * # noqa: F403
__all__ = [
"Event",
"Stream",
"StreamContext",
"current_device",
"current_stream",
"default_generators",
"device",
"device_of",
"device_count",
"empty_cache",
"get_device_capability",
"get_device_name",
"get_device_properties",
"get_rng_state",
"get_rng_state_all",
"get_stream",
"init",
"initial_seed",
"is_available",
"is_bf16_supported",
"is_initialized",
"manual_seed",
"manual_seed_all",
"seed",
"seed_all",
"set_device",
"set_rng_state",
"set_rng_state_all",
"set_stream",
"stream",
"streams",
"synchronize",
]