op_builder/evoformer_attn.py

# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

from .builder import CUDAOpBuilder, installed_cuda_version
import os


class EvoformerAttnBuilder(CUDAOpBuilder):
    BUILD_VAR = "DS_BUILD_EVOFORMER_ATTN"
    NAME = "evoformer_attn"

    def __init__(self, name=None):
        name = self.NAME if name is None else name
        super().__init__(name=name)
        self.cutlass_path = os.environ.get('CUTLASS_PATH')

    def absolute_name(self):
        return f'deepspeed.ops.{self.NAME}_op'

    def extra_ldflags(self):
        if not self.is_rocm_pytorch():
            return ['-lcurand']
        else:
            return []

    def sources(self):
        src_dir = 'csrc/deepspeed4science/evoformer_attn'
        return [f'{src_dir}/attention.cpp', f'{src_dir}/attention_back.cu', f'{src_dir}/attention_cu.cu']

    def nvcc_args(self):
        args = super().nvcc_args()
        try:
            import torch
        except ImportError:
            self.warning("Please install torch if trying to pre-compile kernels")
            return args
        major = torch.cuda.get_device_properties(0).major  #ignore-cuda
        minor = torch.cuda.get_device_properties(0).minor  #ignore-cuda
        args.append(f"-DGPU_ARCH={major}{minor}")
        return args

    def is_compatible(self, verbose=False):
        try:
            import torch
        except ImportError:
            if verbose:
                self.warning("Please install torch if trying to pre-compile kernels")
            return False
        if self.cutlass_path is None:
            if verbose:
                self.warning("Please specify the CUTLASS repo directory as environment variable $CUTLASS_PATH")
            return False
        if os.path.exists(f'{self.cutlass_path}/CHANGELOG.md'):
            with open(f'{self.cutlass_path}/CHANGELOG.md', 'r') as f:
                if '3.1.0' not in f.read():
                    if verbose:
                        self.warning("Please use CUTLASS version >= 3.1.0")
                    return False
        else:
            # pip install nvidia-cutlass package
            try:
                import cutlass
            except ImportError:
                if verbose:
                    self.warning("Please pip install nvidia-cutlass if trying to pre-compile kernels")
                return False
            cutlass_major, cutlass_minor = cutlass.__version__.split('.')[:2]
            cutlass_compatible = (int(cutlass_major) >= 3 and int(cutlass_minor) >= 1)
            if not cutlass_compatible:
                if verbose:
                    self.warning("Please use CUTLASS version >= 3.1.0")
                return False

        cuda_okay = True
        if not self.is_rocm_pytorch() and torch.cuda.is_available():  #ignore-cuda
            sys_cuda_major, _ = installed_cuda_version()
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
            cuda_capability = torch.cuda.get_device_properties(0).major  #ignore-cuda
            if cuda_capability < 7:
                if verbose:
                    self.warning("Please use a GPU with compute capability >= 7.0")
                cuda_okay = False
            if torch_cuda_major < 11 or sys_cuda_major < 11:
                if verbose:
                    self.warning("Please use CUDA 11+")
                cuda_okay = False
        return super().is_compatible(verbose) and cuda_okay

    def include_paths(self):
        includes = [f'{self.cutlass_path}/include', f'{self.cutlass_path}/tools/util/include']
        return includes