forked from microsoft/DeepSpeed
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevoformer_attn.py
93 lines (81 loc) · 3.52 KB
/
evoformer_attn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from .builder import CUDAOpBuilder, installed_cuda_version
import os
class EvoformerAttnBuilder(CUDAOpBuilder):
BUILD_VAR = "DS_BUILD_EVOFORMER_ATTN"
NAME = "evoformer_attn"
def __init__(self, name=None):
name = self.NAME if name is None else name
super().__init__(name=name)
self.cutlass_path = os.environ.get('CUTLASS_PATH')
def absolute_name(self):
return f'deepspeed.ops.{self.NAME}_op'
def extra_ldflags(self):
if not self.is_rocm_pytorch():
return ['-lcurand']
else:
return []
def sources(self):
src_dir = 'csrc/deepspeed4science/evoformer_attn'
return [f'{src_dir}/attention.cpp', f'{src_dir}/attention_back.cu', f'{src_dir}/attention_cu.cu']
def nvcc_args(self):
args = super().nvcc_args()
try:
import torch
except ImportError:
self.warning("Please install torch if trying to pre-compile kernels")
return args
major = torch.cuda.get_device_properties(0).major #ignore-cuda
minor = torch.cuda.get_device_properties(0).minor #ignore-cuda
args.append(f"-DGPU_ARCH={major}{minor}")
return args
def is_compatible(self, verbose=False):
try:
import torch
except ImportError:
if verbose:
self.warning("Please install torch if trying to pre-compile kernels")
return False
if self.cutlass_path is None:
if verbose:
self.warning("Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH")
return False
if os.path.exists(f'{self.cutlass_path}/CHANGELOG.md'):
with open(f'{self.cutlass_path}/CHANGELOG.md', 'r') as f:
if '3.1.0' not in f.read():
if verbose:
self.warning("Please use CUTLASS version >= 3.1.0")
return False
else:
# pip install nvidia-cutlass package
try:
import cutlass
except ImportError:
if verbose:
self.warning("Please pip install nvidia-cutlass if trying to pre-compile kernels")
return False
cutlass_major, cutlass_minor = cutlass.__version__.split('.')[:2]
cutlass_compatible = (int(cutlass_major) >= 3 and int(cutlass_minor) >= 1)
if not cutlass_compatible:
if verbose:
self.warning("Please use CUTLASS version >= 3.1.0")
return False
cuda_okay = True
if not self.is_rocm_pytorch() and torch.cuda.is_available(): #ignore-cuda
sys_cuda_major, _ = installed_cuda_version()
torch_cuda_major = int(torch.version.cuda.split('.')[0])
cuda_capability = torch.cuda.get_device_properties(0).major #ignore-cuda
if cuda_capability < 7:
if verbose:
self.warning("Please use a GPU with compute capability >= 7.0")
cuda_okay = False
if torch_cuda_major < 11 or sys_cuda_major < 11:
if verbose:
self.warning("Please use CUDA 11+")
cuda_okay = False
return super().is_compatible(verbose) and cuda_okay
def include_paths(self):
includes = [f'{self.cutlass_path}/include', f'{self.cutlass_path}/tools/util/include']
return includes