[SYCL] Match explicit offload arch for AMD and NVIDIA (#7028)

jchlanda · web-flow · commit 4189858036bb · 2022-10-18T16:17:13.000+03:00
Fixes: #6792 When specifying multiple SYCL targets make sure that we correctly match offload arch with the target. Normally this is fixed up later on (when calling `SYCLActionBuilder::withBoundArchForToolChain`), but in case of creating libraries we might end up in a broken state, as the code relies on ordering of the gpu map. See the phases of the following clang invocation: `clang++ -fsycl -fsycl-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx908 -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_86 -c in.cpp -o out.o -ccc-print-phases` ``` +- 0: input, "/dash_c_multiple_targets.cpp", c++, (device-sycl, gfx908) +- 1: preprocessor, {0}, c++-cpp-output, (device-sycl, gfx908) +- 2: compiler, {1}, ir, (device-sycl, gfx908) +- 3: offload, "device-sycl (nvptx64-nvidia-cuda:gfx908)" {2}, ir | +- 4: input, "/dash_c_multiple_targets.cpp", c++, (device-sycl, sm_86) | +- 5: preprocessor, {4}, c++-cpp-output, (device-sycl, sm_86) | +- 6: compiler, {5}, ir, (device-sycl, sm_86) |- 7: offload, "device-sycl (amdgcn-amd-amdhsa:sm_86)" {6}, ir | +- 8: input, "/dash_c_multiple_targets.cpp", c++, (host-sycl) | +- 9: append-footer, {8}, c++, (host-sycl) | +- 10: preprocessor, {9}, c++-cpp-output, (host-sycl) | +- 11: offload, "host-sycl (x86_64-unknown-linux-gnu)" {10}, "device-sycl (amdgcn-amd-amdhsa:sm_86)" {6}, c++-cpp-output | +- 12: compiler, {11}, ir, (host-sycl) | +- 13: backend, {12}, assembler, (host-sycl) |- 14: assembler, {13}, object, (host-sycl) 15: clang-offload-bundler, {3, 7, 14}, object, (host-sycl) ``` where we end up in mismatched offload arch.
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
@@ -5806,7 +5806,15 @@ class OffloadingActionBuilder final {
               }
               SYCLTargetInfoList.emplace_back(*TCIt, nullptr);
             } else {
-              SYCLTargetInfoList.emplace_back(*TCIt, GpuArchList[I].second);
+              const char *OffloadArch = nullptr;
+              for (auto &A : GpuArchList) {
+                if (TT == A.first) {
+                  OffloadArch = A.second;
+                  break;
+                }
+              }
+              assert(OffloadArch && "Failed to find matching arch.");
+              SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);
               ++I;
             }
           }
diff --git a/clang/test/Driver/sycl.c b/clang/test/Driver/sycl.c
@@ -149,3 +149,8 @@
 // RUN: %clang_cl -### -fsycl -- %s 2>&1 | FileCheck %s --check-prefix=DEFAULT_STD
 
 // DEFAULT_STD: "-sycl-std=2020"
+
+/// Verify correct match of offload arch with multiple sycl targets
+// RUN: %clang -fsycl -fsycl-targets=nvptx64-nvidia-cuda,amdgcn-amd-amdhsa -Xsycl-target-backend=amdgcn-amd-amdhsa --offload-arch=gfx908 -Xsycl-target-backend=nvptx64-nvidia-cuda --offload-arch=sm_86 -c -ccc-print-phases %s 2>&1 | FileCheck %s --check-prefix=MULTIPLE_TARGETS
+// MULTIPLE_TARGETS: offload, "device-sycl (nvptx64-nvidia-cuda:sm_86)"
+// MULTIPLE_TARGETS: offload, "device-sycl (amdgcn-amd-amdhsa:gfx908)"

Original file line number	Diff line number	Diff line change
`@@ -5806,7 +5806,15 @@ class OffloadingActionBuilder final {`
`5806`	`5806`	`}`
`5807`	`5807`	`SYCLTargetInfoList.emplace_back(*TCIt, nullptr);`
`5808`	`5808`	`} else {`
`5809`		`- SYCLTargetInfoList.emplace_back(*TCIt, GpuArchList[I].second);`
	`5809`	`+ const char *OffloadArch = nullptr;`
	`5810`	`+ for (auto &A : GpuArchList) {`
	`5811`	`+ if (TT == A.first) {`
	`5812`	`+ OffloadArch = A.second;`
	`5813`	`+ break;`
	`5814`	`+ }`
	`5815`	`+ }`
	`5816`	`+ assert(OffloadArch && "Failed to find matching arch.");`
	`5817`	`+ SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch);`
`5810`	`5818`	`++I;`
`5811`	`5819`	`}`
`5812`	`5820`	`}`