Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deepspeed v0.9.0 #10

Merged
merged 4 commits into from
May 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add pytorch 1.20 and building patch
  • Loading branch information
hmaarrfk committed May 15, 2023
commit 19ea1cf12cd6cb2715de765c78c81c001c0d10c1
7 changes: 7 additions & 0 deletions .ci_support/migrations/pytorch20.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
__migrator:
build_number: 1
kind: version
migration_number: 1
migrator_ts: 1680651766.97899
pytorch:
- '2.0'
66 changes: 66 additions & 0 deletions recipe/3085.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
diff --git a/op_builder/builder.py b/op_builder/builder.py
index 44d6a440c05..ea10320529f 100644
--- a/op_builder/builder.py
+++ b/op_builder/builder.py
@@ -36,9 +36,6 @@


def installed_cuda_version(name=""):
- import torch.cuda
- if not torch.cuda.is_available():
- return 0, 0
import torch.utils.cpp_extension
cuda_home = torch.utils.cpp_extension.CUDA_HOME
assert cuda_home is not None, "CUDA_HOME does not exist, unable to compile CUDA op(s)"
@@ -78,8 +75,6 @@ def get_default_compute_capabilities():

def assert_no_cuda_mismatch(name=""):
cuda_major, cuda_minor = installed_cuda_version(name)
- if cuda_minor == 0 and cuda_major == 0:
- return False
sys_cuda_version = f'{cuda_major}.{cuda_minor}'
torch_cuda_version = ".".join(torch.version.cuda.split('.')[:2])
# This is a show-stopping error, should probably not proceed past this
@@ -344,10 +339,11 @@ def cpu_arch(self):

def is_cuda_enable(self):
try:
- if torch.cuda.is_available():
- return '-D__ENABLE_CUDA__'
- except:
- print(f"{WARNING} {self.name} torch.cuda is missing, only cpu ops can be compiled!")
+ assert_no_cuda_mismatch(self.name)
+ return '-D__ENABLE_CUDA__'
+ except BaseException:
+ print(f"{WARNING} {self.name} cuda is missing or is incompatible with installed torch, "
+ "only cpu ops can be compiled!")
return '-D__DISABLE_CUDA__'
return '-D__DISABLE_CUDA__'

@@ -459,7 +455,11 @@ def jit_load(self, verbose=True):
raise RuntimeError(f"Unable to JIT load the {self.name} op due to ninja not being installed.")

if isinstance(self, CUDAOpBuilder) and not self.is_rocm_pytorch():
- self.build_for_cpu = not assert_no_cuda_mismatch(self.name)
+ try:
+ assert_no_cuda_mismatch(self.name)
+ self.build_for_cpu = False
+ except BaseException:
+ self.build_for_cpu = True

self.jit_mode = True
from torch.utils.cpp_extension import load
@@ -579,7 +579,12 @@ def is_compatible(self, verbose=True):
return super().is_compatible(verbose)

def builder(self):
- self.build_for_cpu = not assert_no_cuda_mismatch(self.name)
+ try:
+ assert_no_cuda_mismatch(self.name)
+ self.build_for_cpu = False
+ except BaseException:
+ self.build_for_cpu = True
+
if self.build_for_cpu:
from torch.utils.cpp_extension import CppExtension as ExtensionBuilder
else:
4 changes: 4 additions & 0 deletions recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ package:
source:
url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.tar.gz
sha256: 1301e73b1f52ec857197c1189a263ec2b17c833312bf7cbac74ddb1d6aca142d
patches:
# remove torch.cuda.is_available() check when compiling ops
# https://github.com/microsoft/DeepSpeed/pull/3085
- 3085.diff

build:
number: {{ number }}
Expand Down