-
Notifications
You must be signed in to change notification settings - Fork 791
[SYCL][New offload model] Add SYCL E2E tests for --offload-new-driver option and fix failing tests #14730
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SYCL][New offload model] Add SYCL E2E tests for --offload-new-driver option and fix failing tests #14730
Changes from all commits
7c1b9ca
3cb7a33
88f964d
f053bd5
b02cfd0
d667c63
91c3fae
0c1af79
151909a
5d728c7
a96e739
f720222
a858ce5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
// | ||
//===---------------------------------------------------------------------===// | ||
|
||
#include "clang/Basic/Cuda.h" | ||
#include "clang/Basic/Version.h" | ||
#include "llvm/ADT/MapVector.h" | ||
#include "llvm/BinaryFormat/Magic.h" | ||
|
@@ -409,6 +410,46 @@ fatbinary(ArrayRef<std::pair<StringRef, StringRef>> InputFiles, | |
|
||
return *TempFileOrErr; | ||
} | ||
|
||
// ptxas binary | ||
Expected<StringRef> ptxas(StringRef InputFile, const ArgList &Args, | ||
StringRef Arch) { | ||
llvm::TimeTraceScope TimeScope("NVPTX ptxas"); | ||
// NVPTX uses the ptxas program to process assembly files. | ||
Expected<std::string> PtxasPath = | ||
findProgram("ptxas", {CudaBinaryPath + "/bin"}); | ||
if (!PtxasPath) | ||
return PtxasPath.takeError(); | ||
|
||
llvm::Triple Triple( | ||
Args.getLastArgValue(OPT_host_triple_EQ, sys::getDefaultTargetTriple())); | ||
|
||
// Create a new file to write the output to. | ||
auto TempFileOrErr = | ||
createOutputFile(sys::path::filename(ExecutableName), "cubin"); | ||
if (!TempFileOrErr) | ||
return TempFileOrErr.takeError(); | ||
|
||
SmallVector<StringRef, 16> CmdArgs; | ||
CmdArgs.push_back(*PtxasPath); | ||
CmdArgs.push_back(Triple.isArch64Bit() ? "-m64" : "-m32"); | ||
// Pass -v to ptxas if it was passed to the driver. | ||
if (Args.hasArg(OPT_verbose)) | ||
CmdArgs.push_back("-v"); | ||
StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); | ||
if (Args.hasArg(OPT_debug)) | ||
CmdArgs.push_back("-g"); | ||
else | ||
CmdArgs.push_back(Args.MakeArgString("-" + OptLevel)); | ||
CmdArgs.push_back("--gpu-name"); | ||
CmdArgs.push_back(Arch); | ||
CmdArgs.push_back("--output-file"); | ||
CmdArgs.push_back(*TempFileOrErr); | ||
CmdArgs.push_back(InputFile); | ||
if (Error Err = executeCommands(*PtxasPath, CmdArgs)) | ||
return std::move(Err); | ||
return *TempFileOrErr; | ||
} | ||
} // namespace nvptx | ||
|
||
namespace amdgcn { | ||
|
@@ -1240,7 +1281,8 @@ static Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles, | |
} // namespace sycl | ||
|
||
namespace generic { | ||
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { | ||
Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args, | ||
bool IsSYCLKind = false) { | ||
llvm::TimeTraceScope TimeScope("Clang"); | ||
// Use `clang` to invoke the appropriate device tools. | ||
Expected<std::string> ClangPath = | ||
|
@@ -1276,6 +1318,8 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) { | |
if (!Triple.isNVPTX()) | ||
CmdArgs.push_back("-Wl,--no-undefined"); | ||
|
||
if (IsSYCLKind && Triple.isNVPTX()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we know why this is SYCL specific and not NVPTX specific? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For now, I am trying to match the behavior of new offloading model with that found in the new offloading model (for SYCL). This particular flow (getting the assembly file from the clang call and then explicitly calling ptxas inside the linker-wrapper tool) matches with what is found in the old offloading model, but does not match the community flow. Hence the SYCL specificity. This surely warrants revisiting at a later time. |
||
CmdArgs.push_back("-S"); | ||
for (StringRef InputFile : InputFiles) | ||
CmdArgs.push_back(InputFile); | ||
|
||
|
@@ -1369,7 +1413,7 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles, | |
case Triple::ppc64: | ||
case Triple::ppc64le: | ||
case Triple::systemz: | ||
return generic::clang(InputFiles, Args); | ||
return generic::clang(InputFiles, Args, IsSYCLKind); | ||
case Triple::spirv32: | ||
case Triple::spirv64: | ||
case Triple::spir: | ||
|
@@ -2078,14 +2122,40 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles( | |
return OutputFile.takeError(); | ||
WrappedOutput.push_back(*OutputFile); | ||
} | ||
|
||
for (size_t I = 0, E = SplitModules.size(); I != E; ++I) { | ||
SmallVector<StringRef> Files = {SplitModules[I].ModuleFilePath}; | ||
auto LinkedFileFinalOrErr = | ||
StringRef Arch = LinkerArgs.getLastArgValue(OPT_arch_EQ); | ||
if (Arch.empty()) | ||
Arch = "native"; | ||
SmallVector<std::pair<StringRef, StringRef>, 4> BundlerInputFiles; | ||
auto ClangOutputOrErr = | ||
linkDevice(Files, LinkerArgs, true /* IsSYCLKind */); | ||
if (!LinkedFileFinalOrErr) | ||
return LinkedFileFinalOrErr.takeError(); | ||
SplitModules[I].ModuleFilePath = *LinkedFileFinalOrErr; | ||
if (!ClangOutputOrErr) | ||
return ClangOutputOrErr.takeError(); | ||
if (Triple.isNVPTX()) { | ||
auto VirtualArch = StringRef(clang::CudaArchToVirtualArchString( | ||
clang::StringToCudaArch(Arch))); | ||
auto PtxasOutputOrErr = | ||
nvptx::ptxas(*ClangOutputOrErr, LinkerArgs, Arch); | ||
if (!PtxasOutputOrErr) | ||
return PtxasOutputOrErr.takeError(); | ||
BundlerInputFiles.emplace_back(*ClangOutputOrErr, VirtualArch); | ||
BundlerInputFiles.emplace_back(*PtxasOutputOrErr, Arch); | ||
auto BundledFileOrErr = | ||
nvptx::fatbinary(BundlerInputFiles, LinkerArgs); | ||
if (!BundledFileOrErr) | ||
return BundledFileOrErr.takeError(); | ||
SplitModules[I].ModuleFilePath = *BundledFileOrErr; | ||
} else if (Triple.isAMDGCN()) { | ||
BundlerInputFiles.emplace_back(*ClangOutputOrErr, Arch); | ||
auto BundledFileOrErr = | ||
amdgcn::fatbinary(BundlerInputFiles, LinkerArgs); | ||
if (!BundledFileOrErr) | ||
return BundledFileOrErr.takeError(); | ||
SplitModules[I].ModuleFilePath = *BundledFileOrErr; | ||
} else { | ||
SplitModules[I].ModuleFilePath = *ClangOutputOrErr; | ||
} | ||
} | ||
// TODO(NOM7): Remove this call and use community flow for bundle/wrap | ||
auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
//==--- aot.cpp - Simple vector addition (AOT compilation example) --------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===---------------------------------------------------------------------===// | ||
|
||
#include <sycl/detail/core.hpp> | ||
|
||
#include <array> | ||
#include <iostream> | ||
|
||
constexpr sycl::access::mode sycl_read = sycl::access::mode::read; | ||
constexpr sycl::access::mode sycl_write = sycl::access::mode::write; | ||
|
||
template <typename T> class Vadd; | ||
|
||
template <typename T, size_t N> | ||
void vadd(const std::array<T, N> &A, const std::array<T, N> &B, | ||
std::array<T, N> &C) { | ||
sycl::queue Queue([](sycl::exception_list ExceptionList) { | ||
for (std::exception_ptr ExceptionPtr : ExceptionList) { | ||
try { | ||
std::rethrow_exception(ExceptionPtr); | ||
} catch (sycl::exception &E) { | ||
std::cerr << E.what(); | ||
} catch (...) { | ||
std::cerr << "Unknown async exception was caught." << std::endl; | ||
} | ||
} | ||
}); | ||
|
||
sycl::range<1> numOfItems{N}; | ||
sycl::buffer bufA(A.data(), numOfItems); | ||
sycl::buffer bufB(B.data(), numOfItems); | ||
sycl::buffer bufC(C.data(), numOfItems); | ||
|
||
Queue.submit([&](sycl::handler &cgh) { | ||
sycl::accessor accA{bufA, cgh, sycl::read_only}; | ||
sycl::accessor accB{bufB, cgh, sycl::read_only}; | ||
sycl::accessor accC{bufC, cgh, sycl::write_only}; | ||
|
||
cgh.parallel_for<Vadd<T>>(numOfItems, [=](sycl::id<1> wiID) { | ||
accC[wiID] = accA[wiID] + accB[wiID]; | ||
}); | ||
}); | ||
|
||
Queue.wait_and_throw(); | ||
} | ||
|
||
int main() { | ||
const size_t array_size = 4; | ||
std::array<int, array_size> A = {{1, 2, 3, 4}}, B = {{1, 2, 3, 4}}, C; | ||
std::array<float, array_size> D = {{1.f, 2.f, 3.f, 4.f}}, | ||
E = {{1.f, 2.f, 3.f, 4.f}}, F; | ||
vadd(A, B, C); | ||
vadd(D, E, F); | ||
for (unsigned int i = 0; i < array_size; i++) { | ||
if (C[i] != A[i] + B[i]) { | ||
std::cout << "Incorrect result (element " << i << " is " << C[i] << "!\n"; | ||
return 1; | ||
} | ||
if (F[i] != D[i] + E[i]) { | ||
std::cout << "Incorrect result (element " << i << " is " << F[i] << "!\n"; | ||
return 1; | ||
} | ||
} | ||
std::cout << "Correct result!\n"; | ||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#include "split-per-source.h" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need a copyright header? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This file was lifted from here: ./sycl/test-e2e/DeviceCodeSplit/Inputs/split-per-source-second-file.cpp where I do not find a copyright header. |
||
|
||
void runKernelsFromFile2() { | ||
sycl::queue Q; | ||
int Data = 0; | ||
{ | ||
sycl::buffer<int, 1> Buf(&Data, sycl::range<1>(1)); | ||
auto KernelID1 = sycl::get_kernel_id<File2Kern1>(); | ||
auto KB = sycl::get_kernel_bundle<sycl::bundle_state::executable>( | ||
Q.get_context(), {KernelID1}); | ||
auto Krn = KB.get_kernel(KernelID1); | ||
|
||
std::vector<sycl::kernel_id> KernelIDStorage = KB.get_kernel_ids(); | ||
assert(KernelIDStorage.size() == 1); | ||
assert(KernelIDStorage[0] == KernelID1); | ||
|
||
Q.submit([&](sycl::handler &Cgh) { | ||
auto Acc = Buf.get_access<sycl::access::mode::read_write>(Cgh); | ||
Cgh.single_task<File2Kern1>(Krn, [=]() { Acc[0] = 3; }); | ||
}); | ||
} | ||
assert(Data == 3); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#include <sycl/detail/core.hpp> | ||
|
||
class File1Kern1; | ||
class File1Kern2; | ||
class File2Kern1; | ||
|
||
void runKernelsFromFile2(); |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
// REQUIRES: opencl-aot, cpu | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we also need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not aware of this setting. But I can add that here. Again this test was lifted from ./sycl/test-e2e/DeviceCodeSplit/aot-cpu.cpp and I added --offload-new-driver |
||
|
||
// Test with `--offload-new-driver` | ||
// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source -fsycl-targets=spir64_x86_64 -I %S/Inputs -o %t.out %S/split-per-source-main.cpp %S/Inputs/split-per-source-second-file.cpp \ | ||
// RUN: -fsycl-dead-args-optimization --offload-new-driver | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have lifted this file again from ./sycl/test-e2e/DeviceCodeSplit/aot-cpu.cpp. May be we can have a cleanup PR later on? |
||
// RUN: %{run} %t.out |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
// REQUIRES: ocloc, gpu | ||
// UNSUPPORTED: cuda || hip | ||
// CUDA does neither support device code splitting nor SPIR. | ||
// Test with `--offload-new-driver` | ||
// | ||
// RUN: %clangxx -fsycl -fsycl-device-code-split=per_source \ | ||
// RUN: -fsycl-targets=spir64_gen \ | ||
// RUN: -Xsycl-target-backend=spir64_gen \ | ||
// RUN: "-device tgllp" -I %S/Inputs -o %t.out \ | ||
// RUN: %S/split-per-source-main.cpp \ | ||
// RUN: %S/Inputs/split-per-source-second-file.cpp \ | ||
// RUN: -fsycl-dead-args-optimization --offload-new-driver | ||
// RUN: %{run} %t.out |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
//==--- cpu.cpp - AOT compilation for cpu devices using opencl-aot --------==// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===---------------------------------------------------------------------===// | ||
|
||
// REQUIRES: opencl-aot, cpu | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above comments. Thanks |
||
|
||
// Test with `--offload-new-driver` | ||
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -o %t.out | ||
// RUN: %{run} %t.out | ||
|
||
// Test that opencl-aot can handle multiple build options. | ||
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--bo=-g" -Xsycl-target-backend "--bo=-cl-opt-disable" -o %t2.out | ||
|
||
// Test that opencl-aot can handle march option. | ||
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--march=avx512" | ||
// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 --offload-new-driver %S/Inputs/aot.cpp -Xsycl-target-backend "--march=wsm" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: if we are returning, do we need to
std::move
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see that std::move has been used wherever we are returning Err. I followed the trend.