Skip to content

Commit 5f8e54a

Browse files
committed
Re-enable MIOpen for amd cards
Default MIOPEN_FIND_MODE=FAST Default PYTORCH_MIOPEN_SUGGEST_NHWC=0
1 parent 5d9ad0c commit 5f8e54a

File tree

1 file changed

+10
-4
lines changed

1 file changed

+10
-4
lines changed

comfy/model_management.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,17 +337,23 @@ def amd_min_version(device=None, min_rdna_version=0):
337337
try:
338338
if is_amd():
339339
arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
340-
if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
341-
torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD
342-
logging.info("Set: torch.backends.cudnn.enabled = False for better AMD performance.")
343-
344340
try:
345341
rocm_version = tuple(map(int, str(torch.version.hip).split(".")[:2]))
346342
except:
347343
rocm_version = (6, -1)
348344

349345
logging.info("AMD arch: {}".format(arch))
350346
logging.info("ROCm version: {}".format(rocm_version))
347+
348+
if os.getenv('MIOPEN_FIND_MODE') is None:
349+
# MIOpen default search mode can cause significant slowdowns without much benefit
350+
os.environ['MIOPEN_FIND_MODE'] = "FAST"
351+
logging.info("Set: MIOPEN_FIND_MODE=FAST for better AMD performance, change by setting MIOPEN_FIND_MODE.")
352+
if os.getenv('PYTORCH_MIOPEN_SUGGEST_NHWC') is None:
353+
# See https://github.com/ROCm/TheRock/issues/2485#issuecomment-3666986174
354+
os.environ['PYTORCH_MIOPEN_SUGGEST_NHWC'] = "0"
355+
logging.info("Set: PYTORCH_MIOPEN_SUGGEST_NHWC=0 for better AMD performance, change by setting PYTORCH_MIOPEN_SUGGEST_NHWC.")
356+
351357
if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
352358
if importlib.util.find_spec('triton') is not None: # AMD efficient attention implementation depends on triton. TODO: better way of detecting if it's compiled in or not.
353359
if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much

0 commit comments

Comments
 (0)