flash-algo · LoserCheems · Sep 20, 2025 · Sep 20, 2025 · Sep 20, 2025 · Sep 20, 2025
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -43,16 +43,16 @@ jobs:
       matrix:
         # Using ubuntu-22.04 instead of 24.04 for more compatibility (glibc). Ideally we'd use the
         # manylinux docker image, but I haven't figured out how to install CUDA on manylinux.
-        os: [ubuntu-22.04, ubuntu-22.04-arm64]
-        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
+        os: [ubuntu-22.04]
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
         torch-version: ["2.5.1", "2.6.0", "2.7.1", "2.8.0"]
         cuda-version: ["12.9.1"]
         # We need separate wheels that either uses C++11 ABI (-D_GLIBCXX_USE_CXX11_ABI) or not.
         # Pytorch wheels currently don't use it, but nvcr images have Pytorch compiled with C++11 ABI.
         # Without this we get import error (undefined symbol: _ZN3c105ErrorC2ENS_14SourceLocationESs)
         # when building without C++11 ABI and using it on nvcr images.
         cxx11_abi: ["FALSE", "TRUE"]
-        arch: ["80", "86", "89", "90", "100", "120"]
+        arch: ["80", "90"]
         include:
             - torch-version: "2.9.0.dev20250904"
               cuda-version: "13.0"

diff --git a/setup.py b/setup.py
@@ -79,7 +79,7 @@ def should_skip_cuda_build():
 
 @functools.lru_cache(maxsize=None)
 def cuda_archs():
-    return os.getenv("FLASH_DMATTN_CUDA_ARCHS", "80;86;89;90;100;120").split(";")
+    return os.getenv("FLASH_DMATTN_CUDA_ARCHS", "80;90").split(";")
 
 
 def detect_preferred_sm_arch() -> Optional[str]: