Skip to content

Conversation

jhuber6
Copy link
Contributor

@jhuber6 jhuber6 commented Sep 17, 2025

Backport fix, required to keep OpenMP offloading code from segfaulting with new CUDA.

Copy link
Member

@Artem-B Artem-B left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with one minor issue.

@github-project-automation github-project-automation bot moved this from Needs Triage to Needs Merge in LLVM Release Status Sep 17, 2025
Summary:
We rely on these flags to do things in the runtime and print the
contents of binaries correctly. CUDA updated their ABI encoding recently
and we didn't handle that. it's a new ABI entirely so we just select on
it when it shows up.

Fixes: llvm#148703

[LLVM] Fix offload and update CUDA ABI for all SM values (llvm#159354)

Summary:
Turns out the new CUDA ABI now applies retroactively to all the other
SMs if you upgrade to CUDA 13.0. This patch changes the scheme, keeping
all the SM flags consistent but using an offset.

Fixes: llvm#159088
@llvmbot
Copy link
Member

llvmbot commented Sep 17, 2025

@llvm/pr-subscribers-offload

@llvm/pr-subscribers-llvm-binary-utilities

Author: Joseph Huber (jhuber6)

Changes

Backport fix.


Full diff: https://github.com/llvm/llvm-project/pull/159451.diff

6 Files Affected:

  • (modified) llvm/include/llvm/BinaryFormat/ELF.h (+25-3)
  • (modified) llvm/include/llvm/Object/ELFObjectFile.h (+1)
  • (modified) llvm/lib/Object/ELFObjectFile.cpp (+30-2)
  • (modified) llvm/tools/llvm-readobj/ELFDumper.cpp (+82-32)
  • (modified) offload/plugins-nextgen/common/src/Utils/ELF.cpp (+15-8)
  • (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+5-1)
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index ebb257ab33821..cfae75d093421 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -362,6 +362,7 @@ enum {
   ELFOSABI_FENIXOS = 16,       // FenixOS
   ELFOSABI_CLOUDABI = 17,      // Nuxi CloudABI
   ELFOSABI_CUDA = 51,          // NVIDIA CUDA architecture.
+  ELFOSABI_CUDA_V2 = 41,       // NVIDIA CUDA architecture.
   ELFOSABI_FIRST_ARCH = 64,    // First architecture-specific OS ABI
   ELFOSABI_AMDGPU_HSA = 64,    // AMD HSA runtime
   ELFOSABI_AMDGPU_PAL = 65,    // AMD PAL runtime
@@ -385,6 +386,12 @@ enum {
   ELFABIVERSION_AMDGPU_HSA_V6 = 4,
 };
 
+// CUDA OS ABI Version identification.
+enum {
+  ELFABIVERSION_CUDA_V1 = 7,
+  ELFABIVERSION_CUDA_V2 = 8,
+};
+
 #define ELF_RELOC(name, value) name = value,
 
 // X86_64 relocations.
@@ -921,9 +928,15 @@ enum {
 
 // NVPTX specific e_flags.
 enum : unsigned {
-  // Processor selection mask for EF_CUDA_SM* values.
+  // Processor selection mask for EF_CUDA_SM* values prior to blackwell.
   EF_CUDA_SM = 0xff,
 
+  // Processor selection mask for EF_CUDA_SM* values following blackwell.
+  EF_CUDA_SM_MASK = 0xff00,
+
+  // Processor selection mask for EF_CUDA_SM* values following blackwell.
+  EF_CUDA_SM_OFFSET = 8,
+
   // SM based processor values.
   EF_CUDA_SM20 = 0x14,
   EF_CUDA_SM21 = 0x15,
@@ -943,9 +956,15 @@ enum : unsigned {
   EF_CUDA_SM80 = 0x50,
   EF_CUDA_SM86 = 0x56,
   EF_CUDA_SM87 = 0x57,
+  EF_CUDA_SM88 = 0x58,
   EF_CUDA_SM89 = 0x59,
-  // The sm_90a variant uses the same machine flag.
   EF_CUDA_SM90 = 0x5a,
+  EF_CUDA_SM100 = 0x64,
+  EF_CUDA_SM101 = 0x65,
+  EF_CUDA_SM103 = 0x67,
+  EF_CUDA_SM110 = 0x6e,
+  EF_CUDA_SM120 = 0x78,
+  EF_CUDA_SM121 = 0x79,
 
   // Unified texture binding is enabled.
   EF_CUDA_TEXMODE_UNIFIED = 0x100,
@@ -954,12 +973,15 @@ enum : unsigned {
   // The target is using 64-bit addressing.
   EF_CUDA_64BIT_ADDRESS = 0x400,
   // Set when using the sm_90a processor.
-  EF_CUDA_ACCELERATORS = 0x800,
+  EF_CUDA_ACCELERATORS_V1 = 0x800,
   // Undocumented software feature.
   EF_CUDA_SW_FLAG_V2 = 0x1000,
 
   // Virtual processor selection mask for EF_CUDA_VIRTUAL_SM* values.
   EF_CUDA_VIRTUAL_SM = 0xff0000,
+
+  // Set when using an accelerator variant like sm_100a.
+  EF_CUDA_ACCELERATORS = 0x8,
 };
 
 // ELF Relocation types for BPF
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 103686884e705..30a9dd35f624e 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -1479,6 +1479,7 @@ template <class ELFT> Triple::OSType ELFObjectFile<ELFT>::getOS() const {
   case ELF::ELFOSABI_OPENBSD:
     return Triple::OpenBSD;
   case ELF::ELFOSABI_CUDA:
+  case ELF::ELFOSABI_CUDA_V2:
     return Triple::CUDA;
   case ELF::ELFOSABI_AMDGPU_HSA:
     return Triple::AMDHSA;
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 0e13d32bbe522..a6b56ae77cf21 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -620,7 +620,10 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
 
 StringRef ELFObjectFileBase::getNVPTXCPUName() const {
   assert(getEMachine() == ELF::EM_CUDA);
-  unsigned SM = getPlatformFlags() & ELF::EF_CUDA_SM;
+  unsigned SM = getEIdentABIVersion() == ELF::ELFABIVERSION_CUDA_V1
+                    ? getPlatformFlags() & ELF::EF_CUDA_SM
+                    : (getPlatformFlags() & ELF::EF_CUDA_SM_MASK) >>
+                          ELF::EF_CUDA_SM_OFFSET;
 
   switch (SM) {
   // Fermi architecture.
@@ -672,6 +675,8 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
     return "sm_86";
   case ELF::EF_CUDA_SM87:
     return "sm_87";
+  case ELF::EF_CUDA_SM88:
+    return "sm_88";
 
   // Ada architecture.
   case ELF::EF_CUDA_SM89:
@@ -679,7 +684,30 @@ StringRef ELFObjectFileBase::getNVPTXCPUName() const {
 
   // Hopper architecture.
   case ELF::EF_CUDA_SM90:
-    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_90a" : "sm_90";
+    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS_V1 ? "sm_90a"
+                                                             : "sm_90";
+
+  // Blackwell architecture.
+  case ELF::EF_CUDA_SM100:
+    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_100a"
+                                                          : "sm_100";
+  case ELF::EF_CUDA_SM101:
+    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_101a"
+                                                          : "sm_101";
+  case ELF::EF_CUDA_SM103:
+    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_103a"
+                                                          : "sm_103";
+  case ELF::EF_CUDA_SM110:
+    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_110a"
+                                                          : "sm_110";
+
+  // Blackwell architecture.
+  case ELF::EF_CUDA_SM120:
+    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_120a"
+                                                          : "sm_120";
+  case ELF::EF_CUDA_SM121:
+    return getPlatformFlags() & ELF::EF_CUDA_ACCELERATORS ? "sm_121a"
+                                                          : "sm_121";
   default:
     llvm_unreachable("Unknown EF_CUDA_SM value");
   }
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 101079f09e1d2..3fd167df1ecc5 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1083,26 +1083,26 @@ const EnumEntry<unsigned> ElfObjectFileType[] = {
 };
 
 const EnumEntry<unsigned> ElfOSABI[] = {
-  {"SystemV",      "UNIX - System V",      ELF::ELFOSABI_NONE},
-  {"HPUX",         "UNIX - HP-UX",         ELF::ELFOSABI_HPUX},
-  {"NetBSD",       "UNIX - NetBSD",        ELF::ELFOSABI_NETBSD},
-  {"GNU/Linux",    "UNIX - GNU",           ELF::ELFOSABI_LINUX},
-  {"GNU/Hurd",     "GNU/Hurd",             ELF::ELFOSABI_HURD},
-  {"Solaris",      "UNIX - Solaris",       ELF::ELFOSABI_SOLARIS},
-  {"AIX",          "UNIX - AIX",           ELF::ELFOSABI_AIX},
-  {"IRIX",         "UNIX - IRIX",          ELF::ELFOSABI_IRIX},
-  {"FreeBSD",      "UNIX - FreeBSD",       ELF::ELFOSABI_FREEBSD},
-  {"TRU64",        "UNIX - TRU64",         ELF::ELFOSABI_TRU64},
-  {"Modesto",      "Novell - Modesto",     ELF::ELFOSABI_MODESTO},
-  {"OpenBSD",      "UNIX - OpenBSD",       ELF::ELFOSABI_OPENBSD},
-  {"OpenVMS",      "VMS - OpenVMS",        ELF::ELFOSABI_OPENVMS},
-  {"NSK",          "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
-  {"AROS",         "AROS",                 ELF::ELFOSABI_AROS},
-  {"FenixOS",      "FenixOS",              ELF::ELFOSABI_FENIXOS},
-  {"CloudABI",     "CloudABI",             ELF::ELFOSABI_CLOUDABI},
-  {"CUDA",         "NVIDIA - CUDA",        ELF::ELFOSABI_CUDA},
-  {"Standalone",   "Standalone App",       ELF::ELFOSABI_STANDALONE}
-};
+    {"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
+    {"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
+    {"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
+    {"GNU/Linux", "UNIX - GNU", ELF::ELFOSABI_LINUX},
+    {"GNU/Hurd", "GNU/Hurd", ELF::ELFOSABI_HURD},
+    {"Solaris", "UNIX - Solaris", ELF::ELFOSABI_SOLARIS},
+    {"AIX", "UNIX - AIX", ELF::ELFOSABI_AIX},
+    {"IRIX", "UNIX - IRIX", ELF::ELFOSABI_IRIX},
+    {"FreeBSD", "UNIX - FreeBSD", ELF::ELFOSABI_FREEBSD},
+    {"TRU64", "UNIX - TRU64", ELF::ELFOSABI_TRU64},
+    {"Modesto", "Novell - Modesto", ELF::ELFOSABI_MODESTO},
+    {"OpenBSD", "UNIX - OpenBSD", ELF::ELFOSABI_OPENBSD},
+    {"OpenVMS", "VMS - OpenVMS", ELF::ELFOSABI_OPENVMS},
+    {"NSK", "HP - Non-Stop Kernel", ELF::ELFOSABI_NSK},
+    {"AROS", "AROS", ELF::ELFOSABI_AROS},
+    {"FenixOS", "FenixOS", ELF::ELFOSABI_FENIXOS},
+    {"CloudABI", "CloudABI", ELF::ELFOSABI_CLOUDABI},
+    {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA},
+    {"CUDA", "NVIDIA - CUDA", ELF::ELFOSABI_CUDA_V2},
+    {"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}};
 
 const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
   {"AMDGPU_HSA",    "AMDGPU - HSA",    ELF::ELFOSABI_AMDGPU_HSA},
@@ -1666,16 +1666,60 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
 };
 
 const EnumEntry<unsigned> ElfHeaderNVPTXFlags[] = {
-    ENUM_ENT(EF_CUDA_SM20, "sm_20"), ENUM_ENT(EF_CUDA_SM21, "sm_21"),
-    ENUM_ENT(EF_CUDA_SM30, "sm_30"), ENUM_ENT(EF_CUDA_SM32, "sm_32"),
-    ENUM_ENT(EF_CUDA_SM35, "sm_35"), ENUM_ENT(EF_CUDA_SM37, "sm_37"),
-    ENUM_ENT(EF_CUDA_SM50, "sm_50"), ENUM_ENT(EF_CUDA_SM52, "sm_52"),
-    ENUM_ENT(EF_CUDA_SM53, "sm_53"), ENUM_ENT(EF_CUDA_SM60, "sm_60"),
-    ENUM_ENT(EF_CUDA_SM61, "sm_61"), ENUM_ENT(EF_CUDA_SM62, "sm_62"),
-    ENUM_ENT(EF_CUDA_SM70, "sm_70"), ENUM_ENT(EF_CUDA_SM72, "sm_72"),
-    ENUM_ENT(EF_CUDA_SM75, "sm_75"), ENUM_ENT(EF_CUDA_SM80, "sm_80"),
-    ENUM_ENT(EF_CUDA_SM86, "sm_86"), ENUM_ENT(EF_CUDA_SM87, "sm_87"),
-    ENUM_ENT(EF_CUDA_SM89, "sm_89"), ENUM_ENT(EF_CUDA_SM90, "sm_90"),
+    ENUM_ENT(EF_CUDA_SM20, "sm_20"),
+    ENUM_ENT(EF_CUDA_SM21, "sm_21"),
+    ENUM_ENT(EF_CUDA_SM30, "sm_30"),
+    ENUM_ENT(EF_CUDA_SM32, "sm_32"),
+    ENUM_ENT(EF_CUDA_SM35, "sm_35"),
+    ENUM_ENT(EF_CUDA_SM37, "sm_37"),
+    ENUM_ENT(EF_CUDA_SM50, "sm_50"),
+    ENUM_ENT(EF_CUDA_SM52, "sm_52"),
+    ENUM_ENT(EF_CUDA_SM53, "sm_53"),
+    ENUM_ENT(EF_CUDA_SM60, "sm_60"),
+    ENUM_ENT(EF_CUDA_SM61, "sm_61"),
+    ENUM_ENT(EF_CUDA_SM62, "sm_62"),
+    ENUM_ENT(EF_CUDA_SM70, "sm_70"),
+    ENUM_ENT(EF_CUDA_SM72, "sm_72"),
+    ENUM_ENT(EF_CUDA_SM75, "sm_75"),
+    ENUM_ENT(EF_CUDA_SM80, "sm_80"),
+    ENUM_ENT(EF_CUDA_SM86, "sm_86"),
+    ENUM_ENT(EF_CUDA_SM87, "sm_87"),
+    ENUM_ENT(EF_CUDA_SM88, "sm_88"),
+    ENUM_ENT(EF_CUDA_SM89, "sm_89"),
+    ENUM_ENT(EF_CUDA_SM90, "sm_90"),
+    ENUM_ENT(EF_CUDA_SM100, "sm_100"),
+    ENUM_ENT(EF_CUDA_SM101, "sm_101"),
+    ENUM_ENT(EF_CUDA_SM103, "sm_103"),
+    ENUM_ENT(EF_CUDA_SM110, "sm_110"),
+    ENUM_ENT(EF_CUDA_SM120, "sm_120"),
+    ENUM_ENT(EF_CUDA_SM121, "sm_121"),
+    ENUM_ENT(EF_CUDA_SM20 << EF_CUDA_SM_OFFSET, "sm_20"),
+    ENUM_ENT(EF_CUDA_SM21 << EF_CUDA_SM_OFFSET, "sm_21"),
+    ENUM_ENT(EF_CUDA_SM30 << EF_CUDA_SM_OFFSET, "sm_30"),
+    ENUM_ENT(EF_CUDA_SM32 << EF_CUDA_SM_OFFSET, "sm_32"),
+    ENUM_ENT(EF_CUDA_SM35 << EF_CUDA_SM_OFFSET, "sm_35"),
+    ENUM_ENT(EF_CUDA_SM37 << EF_CUDA_SM_OFFSET, "sm_37"),
+    ENUM_ENT(EF_CUDA_SM50 << EF_CUDA_SM_OFFSET, "sm_50"),
+    ENUM_ENT(EF_CUDA_SM52 << EF_CUDA_SM_OFFSET, "sm_52"),
+    ENUM_ENT(EF_CUDA_SM53 << EF_CUDA_SM_OFFSET, "sm_53"),
+    ENUM_ENT(EF_CUDA_SM60 << EF_CUDA_SM_OFFSET, "sm_60"),
+    ENUM_ENT(EF_CUDA_SM61 << EF_CUDA_SM_OFFSET, "sm_61"),
+    ENUM_ENT(EF_CUDA_SM62 << EF_CUDA_SM_OFFSET, "sm_62"),
+    ENUM_ENT(EF_CUDA_SM70 << EF_CUDA_SM_OFFSET, "sm_70"),
+    ENUM_ENT(EF_CUDA_SM72 << EF_CUDA_SM_OFFSET, "sm_72"),
+    ENUM_ENT(EF_CUDA_SM75 << EF_CUDA_SM_OFFSET, "sm_75"),
+    ENUM_ENT(EF_CUDA_SM80 << EF_CUDA_SM_OFFSET, "sm_80"),
+    ENUM_ENT(EF_CUDA_SM86 << EF_CUDA_SM_OFFSET, "sm_86"),
+    ENUM_ENT(EF_CUDA_SM87 << EF_CUDA_SM_OFFSET, "sm_87"),
+    ENUM_ENT(EF_CUDA_SM88 << EF_CUDA_SM_OFFSET, "sm_88"),
+    ENUM_ENT(EF_CUDA_SM89 << EF_CUDA_SM_OFFSET, "sm_89"),
+    ENUM_ENT(EF_CUDA_SM90 << EF_CUDA_SM_OFFSET, "sm_90"),
+    ENUM_ENT(EF_CUDA_SM100 << EF_CUDA_SM_OFFSET, "sm_100"),
+    ENUM_ENT(EF_CUDA_SM101 << EF_CUDA_SM_OFFSET, "sm_101"),
+    ENUM_ENT(EF_CUDA_SM103 << EF_CUDA_SM_OFFSET, "sm_103"),
+    ENUM_ENT(EF_CUDA_SM110 << EF_CUDA_SM_OFFSET, "sm_110"),
+    ENUM_ENT(EF_CUDA_SM120 << EF_CUDA_SM_OFFSET, "sm_120"),
+    ENUM_ENT(EF_CUDA_SM121 << EF_CUDA_SM_OFFSET, "sm_121"),
 };
 
 const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
@@ -3650,10 +3694,16 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
   else if (e.e_machine == EM_XTENSA)
     ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderXtensaFlags),
                           unsigned(ELF::EF_XTENSA_MACH));
-  else if (e.e_machine == EM_CUDA)
+  else if (e.e_machine == EM_CUDA) {
     ElfFlags = printFlags(e.e_flags, ArrayRef(ElfHeaderNVPTXFlags),
                           unsigned(ELF::EF_CUDA_SM));
-  else if (e.e_machine == EM_AMDGPU) {
+    if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1 &&
+        (e.e_flags & ELF::EF_CUDA_ACCELERATORS_V1))
+      ElfFlags += "a";
+    else if (e.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V2 &&
+             (e.e_flags & ELF::EF_CUDA_ACCELERATORS))
+      ElfFlags += "a";
+  } else if (e.e_machine == EM_AMDGPU) {
     switch (e.e_ident[ELF::EI_ABIVERSION]) {
     default:
       break;
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index dfec55432f202..b0ee1984c42ce 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -60,23 +60,30 @@ static Expected<bool>
 checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
   const auto Header = ELFObj.getELFFile().getHeader();
   if (Header.e_type != ET_EXEC && Header.e_type != ET_DYN)
-    return createError("Only executable ELF files are supported");
+    return createError("only executable ELF files are supported");
 
   if (Header.e_machine == EM_AMDGPU) {
     if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
-      return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
+      return createError("invalid AMD OS/ABI, must be AMDGPU_HSA");
     if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
         Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
-      return createError("Invalid AMD ABI version, must be version 5 or above");
+      return createError("invalid AMD ABI version, must be version 5 or above");
     if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
         (Header.e_flags & EF_AMDGPU_MACH) >
             EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
-      return createError("Unsupported AMDGPU architecture");
+      return createError("unsupported AMDGPU architecture");
   } else if (Header.e_machine == EM_CUDA) {
-    if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
-      return createError("Invalid CUDA addressing mode");
-    if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
-      return createError("Unsupported NVPTX architecture");
+    if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V1) {
+      if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
+        return createError("invalid CUDA addressing mode");
+      if ((Header.e_flags & EF_CUDA_SM) < EF_CUDA_SM35)
+        return createError("unsupported NVPTX architecture");
+    } else if (Header.e_ident[EI_ABIVERSION] == ELFABIVERSION_CUDA_V2) {
+      if ((Header.e_flags & EF_CUDA_SM_MASK) < EF_CUDA_SM100)
+        return createError("unsupported NVPTX architecture");
+    } else {
+      return createError("invalid CUDA ABI version");
+    }
   }
 
   return Header.e_machine == EMachine;
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index b787376eb1770..71a28fadfd81d 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -1442,7 +1442,11 @@ struct CUDAPluginTy final : public GenericPluginTy {
       return ElfOrErr.takeError();
 
     // Get the numeric value for the image's `sm_` value.
-    auto SM = ElfOrErr->getPlatformFlags() & ELF::EF_CUDA_SM;
+    const auto Header = ElfOrErr->getELFFile().getHeader();
+    unsigned SM =
+        Header.e_ident[ELF::EI_ABIVERSION] == ELF::ELFABIVERSION_CUDA_V1
+            ? Header.e_flags & ELF::EF_CUDA_SM
+            : (Header.e_flags & ELF::EF_CUDA_SM_MASK) >> ELF::EF_CUDA_SM_OFFSET;
 
     CUdevice Device;
     CUresult Res = cuDeviceGet(&Device, DeviceId);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Status: Needs Merge
Development

Successfully merging this pull request may close these issues.

3 participants