Skip to content

Commit 6aea96e

Browse files
[PGO][Offload] Allow PGO flags to be used on GPU targets
1 parent d32c6dd commit 6aea96e

File tree

19 files changed

+225
-139
lines changed

19 files changed

+225
-139
lines changed

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6387,11 +6387,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
63876387
Args.AddLastArg(CmdArgs, options::OPT_fconvergent_functions,
63886388
options::OPT_fno_convergent_functions);
63896389

6390-
// NVPTX/AMDGCN doesn't support PGO or coverage. There's no runtime support
6391-
// for sampling, overhead of call arc collection is way too high and there's
6392-
// no way to collect the output.
6393-
if (!Triple.isNVPTX() && !Triple.isAMDGCN())
6394-
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
6390+
addPGOAndCoverageFlags(TC, C, JA, Output, Args, SanitizeArgs, CmdArgs);
63956391

63966392
Args.AddLastArg(CmdArgs, options::OPT_fclang_abi_compat_EQ);
63976393

clang/test/Driver/cuda-no-pgo-or-coverage.cu

Lines changed: 0 additions & 33 deletions
This file was deleted.

compiler-rt/include/profile/InstrProfData.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
152152
#define INSTR_PROF_DATA_DEFINED
153153
#endif
154154
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
155-
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
155+
INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
156156
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
157157
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
158158
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)

compiler-rt/lib/profile/InstrProfiling.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,8 @@ int __llvm_write_custom_profile(const char *Target,
310310
const __llvm_profile_data *DataEnd,
311311
const char *CountersBegin,
312312
const char *CountersEnd, const char *NamesBegin,
313-
const char *NamesEnd);
313+
const char *NamesEnd,
314+
const uint64_t *VersionOverride);
314315

315316
/*!
316317
* This variable is defined in InstrProfilingRuntime.cpp as a hidden

compiler-rt/lib/profile/InstrProfilingBuffer.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,5 +252,6 @@ COMPILER_RT_VISIBILITY int __llvm_profile_write_buffer_internal(
252252
&BufferWriter, DataBegin, DataEnd, CountersBegin, CountersEnd,
253253
BitmapBegin, BitmapEnd, /*VPDataReader=*/0, NamesBegin, NamesEnd,
254254
/*VTableBegin=*/NULL, /*VTableEnd=*/NULL, /*VNamesBegin=*/NULL,
255-
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0);
255+
/*VNamesEnd=*/NULL, /*SkipNameDataWrite=*/0,
256+
__llvm_profile_get_version());
256257
}

compiler-rt/lib/profile/InstrProfilingFile.c

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,10 +1273,13 @@ COMPILER_RT_VISIBILITY int __llvm_profile_set_file_object(FILE *File,
12731273
return 0;
12741274
}
12751275

1276-
COMPILER_RT_USED int __llvm_write_custom_profile(
1277-
const char *Target, const __llvm_profile_data *DataBegin,
1278-
const __llvm_profile_data *DataEnd, const char *CountersBegin,
1279-
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd) {
1276+
int __llvm_write_custom_profile(const char *Target,
1277+
const __llvm_profile_data *DataBegin,
1278+
const __llvm_profile_data *DataEnd,
1279+
const char *CountersBegin,
1280+
const char *CountersEnd, const char *NamesBegin,
1281+
const char *NamesEnd,
1282+
const uint64_t *VersionOverride) {
12801283
int ReturnValue = 0, FilenameLength, TargetLength;
12811284
char *FilenameBuf, *TargetFilename;
12821285
const char *Filename;
@@ -1358,10 +1361,15 @@ COMPILER_RT_USED int __llvm_write_custom_profile(
13581361
ProfDataWriter fileWriter;
13591362
initFileWriter(&fileWriter, OutputFile);
13601363

1364+
uint64_t Version = __llvm_profile_get_version();
1365+
if (VersionOverride)
1366+
Version = *VersionOverride;
1367+
13611368
/* Write custom data to the file */
1362-
ReturnValue = lprofWriteDataImpl(
1363-
&fileWriter, DataBegin, DataEnd, CountersBegin, CountersEnd, NULL, NULL,
1364-
lprofGetVPDataReader(), NULL, NULL, NULL, NULL, NamesBegin, NamesEnd, 0);
1369+
ReturnValue =
1370+
lprofWriteDataImpl(&fileWriter, DataBegin, DataEnd, CountersBegin,
1371+
CountersEnd, NULL, NULL, lprofGetVPDataReader(), NULL,
1372+
NULL, NULL, NULL, NamesBegin, NamesEnd, 0, Version);
13651373
closeFileObject(OutputFile);
13661374

13671375
// Restore SIGKILL.

compiler-rt/lib/profile/InstrProfilingInternal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@ int lprofWriteDataImpl(ProfDataWriter *Writer,
160160
VPDataReaderType *VPDataReader, const char *NamesBegin,
161161
const char *NamesEnd, const VTableProfData *VTableBegin,
162162
const VTableProfData *VTableEnd, const char *VNamesBegin,
163-
const char *VNamesEnd, int SkipNameDataWrite);
163+
const char *VNamesEnd, int SkipNameDataWrite,
164+
uint64_t Version);
164165

165166
/* Merge value profile data pointed to by SrcValueProfData into
166167
* in-memory profile counters pointed by to DstData. */

compiler-rt/lib/profile/InstrProfilingWriter.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -254,21 +254,21 @@ COMPILER_RT_VISIBILITY int lprofWriteData(ProfDataWriter *Writer,
254254
const VTableProfData *VTableEnd = __llvm_profile_end_vtables();
255255
const char *VNamesBegin = __llvm_profile_begin_vtabnames();
256256
const char *VNamesEnd = __llvm_profile_end_vtabnames();
257+
uint64_t Version = __llvm_profile_get_version();
257258
return lprofWriteDataImpl(Writer, DataBegin, DataEnd, CountersBegin,
258259
CountersEnd, BitmapBegin, BitmapEnd, VPDataReader,
259260
NamesBegin, NamesEnd, VTableBegin, VTableEnd,
260-
VNamesBegin, VNamesEnd, SkipNameDataWrite);
261+
VNamesBegin, VNamesEnd, SkipNameDataWrite, Version);
261262
}
262263

263-
COMPILER_RT_VISIBILITY int
264-
lprofWriteDataImpl(ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
265-
const __llvm_profile_data *DataEnd,
266-
const char *CountersBegin, const char *CountersEnd,
267-
const char *BitmapBegin, const char *BitmapEnd,
268-
VPDataReaderType *VPDataReader, const char *NamesBegin,
269-
const char *NamesEnd, const VTableProfData *VTableBegin,
270-
const VTableProfData *VTableEnd, const char *VNamesBegin,
271-
const char *VNamesEnd, int SkipNameDataWrite) {
264+
COMPILER_RT_VISIBILITY int lprofWriteDataImpl(
265+
ProfDataWriter *Writer, const __llvm_profile_data *DataBegin,
266+
const __llvm_profile_data *DataEnd, const char *CountersBegin,
267+
const char *CountersEnd, const char *BitmapBegin, const char *BitmapEnd,
268+
VPDataReaderType *VPDataReader, const char *NamesBegin,
269+
const char *NamesEnd, const VTableProfData *VTableBegin,
270+
const VTableProfData *VTableEnd, const char *VNamesBegin,
271+
const char *VNamesEnd, int SkipNameDataWrite, uint64_t Version) {
272272
/* Calculate size of sections. */
273273
const uint64_t DataSectionSize =
274274
__llvm_profile_get_data_size(DataBegin, DataEnd);

llvm/include/llvm/ProfileData/InstrProfData.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
152152
#define INSTR_PROF_DATA_DEFINED
153153
#endif
154154
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
155-
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
155+
INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
156156
INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
157157
INSTR_PROF_RAW_HEADER(uint64_t, NumData, NumData)
158158
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,10 @@ createIRLevelProfileFlagVar(Module &M,
462462
auto IRLevelVersionVariable = new GlobalVariable(
463463
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
464464
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
465-
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
465+
if (isGPUProfTarget(M))
466+
IRLevelVersionVariable->setVisibility(GlobalValue::ProtectedVisibility);
467+
else
468+
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
466469
Triple TT(M.getTargetTriple());
467470
if (TT.supportsCOMDAT()) {
468471
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);

llvm/test/tools/llvm-profdata/binary-ids-padding.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Header
22
//
33
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
4-
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
4+
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
55
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
66
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
77
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)

llvm/test/tools/llvm-profdata/malformed-not-space-for-another-header.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Header
22
//
33
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
4-
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
4+
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
55
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
66
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
77
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)

llvm/test/tools/llvm-profdata/malformed-num-counters-zero.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Header
22
//
33
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
4-
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
4+
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
55
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
66
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
77
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)

llvm/test/tools/llvm-profdata/malformed-ptr-to-counter-array.test

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Header
22
//
33
// INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
4-
// INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
4+
// INSTR_PROF_RAW_HEADER(uint64_t, Version, Version)
55
// INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
66
// INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
77
// INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)

offload/plugins-nextgen/common/include/GlobalHandler.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#ifndef LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
1414
#define LLVM_OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_GLOBALHANDLER_H
1515

16+
#include <optional>
1617
#include <type_traits>
1718

1819
#include "llvm/ADT/DenseMap.h"
@@ -64,10 +65,13 @@ struct __llvm_profile_data {
6465
};
6566

6667
extern "C" {
67-
extern int __attribute__((weak)) __llvm_write_custom_profile(
68-
const char *Target, const __llvm_profile_data *DataBegin,
69-
const __llvm_profile_data *DataEnd, const char *CountersBegin,
70-
const char *CountersEnd, const char *NamesBegin, const char *NamesEnd);
68+
extern int __attribute__((weak))
69+
__llvm_write_custom_profile(const char *Target,
70+
const __llvm_profile_data *DataBegin,
71+
const __llvm_profile_data *DataEnd,
72+
const char *CountersBegin, const char *CountersEnd,
73+
const char *NamesBegin, const char *NamesEnd,
74+
const uint64_t *VersionOverride);
7175
}
7276

7377
/// PGO profiling data extracted from a GPU device
@@ -76,6 +80,7 @@ struct GPUProfGlobals {
7680
SmallVector<__llvm_profile_data> Data;
7781
SmallVector<uint8_t> NamesData;
7882
Triple TargetTriple;
83+
std::optional<uint64_t> Version;
7984

8085
void dump() const;
8186
Error write() const;

offload/plugins-nextgen/common/src/GlobalHandler.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "Shared/Utils.h"
1818

19+
#include "llvm/ProfileData/InstrProfData.inc"
1920
#include "llvm/Support/Error.h"
2021

2122
#include <cstring>
@@ -214,6 +215,13 @@ GenericGlobalHandlerTy::readProfilingGlobals(GenericDeviceTy &Device,
214215
if (auto Err = readGlobalFromDevice(Device, Image, DataGlobal))
215216
return Err;
216217
DeviceProfileData.Data.push_back(std::move(Data));
218+
} else if (*NameOrErr == INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)) {
219+
uint64_t RawVersionData;
220+
GlobalTy RawVersionGlobal(NameOrErr->str(), Sym.getSize(),
221+
&RawVersionData);
222+
if (auto Err = readGlobalFromDevice(Device, Image, RawVersionGlobal))
223+
return Err;
224+
DeviceProfileData.Version = RawVersionData;
217225
}
218226
}
219227
return DeviceProfileData;
@@ -265,7 +273,7 @@ void GPUProfGlobals::dump() const {
265273
}
266274

267275
Error GPUProfGlobals::write() const {
268-
if (!__llvm_write_custom_profile)
276+
if (__llvm_write_custom_profile == nullptr)
269277
return Plugin::error("Could not find symbol __llvm_write_custom_profile. "
270278
"The compiler-rt profiling library must be linked for "
271279
"GPU PGO to work.");
@@ -274,6 +282,8 @@ Error GPUProfGlobals::write() const {
274282
CountsSize = Counts.size() * sizeof(int64_t);
275283
__llvm_profile_data *DataBegin, *DataEnd;
276284
char *CountersBegin, *CountersEnd, *NamesBegin, *NamesEnd;
285+
const uint64_t *VersionOverride =
286+
Version.has_value() ? &Version.value() : nullptr;
277287

278288
// Initialize array of contiguous data. We need to make sure each section is
279289
// contiguous so that the PGO library can compute deltas properly
@@ -295,9 +305,9 @@ Error GPUProfGlobals::write() const {
295305
memcpy(NamesBegin, NamesData.data(), NamesData.size());
296306

297307
// Invoke compiler-rt entrypoint
298-
int result = __llvm_write_custom_profile(TargetTriple.str().c_str(),
299-
DataBegin, DataEnd, CountersBegin,
300-
CountersEnd, NamesBegin, NamesEnd);
308+
int result = __llvm_write_custom_profile(
309+
TargetTriple.str().c_str(), DataBegin, DataEnd, CountersBegin,
310+
CountersEnd, NamesBegin, NamesEnd, VersionOverride);
301311
if (result != 0)
302312
return Plugin::error("Error writing GPU PGO data to file");
303313

offload/test/offloading/gpupgo/pgo1.c

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
// RUN: %libomptarget-compile-generic -fcreate-profile \
2+
// RUN: -Xarch_device -fprofile-generate
3+
// RUN: env LLVM_PROFILE_FILE=%basename_t.llvm.profraw \
4+
// RUN: %libomptarget-run-generic 2>&1
5+
// RUN: llvm-profdata show --all-functions --counts \
6+
// RUN: %target_triple.%basename_t.llvm.profraw | \
7+
// RUN: %fcheck-generic --check-prefix="LLVM-PGO"
8+
9+
// RUN: %libomptarget-compile-generic -fcreate-profile \
10+
// RUN: -Xarch_device -fprofile-instr-generate
11+
// RUN: env LLVM_PROFILE_FILE=%basename_t.clang.profraw \
12+
// RUN: %libomptarget-run-generic 2>&1
13+
// RUN: llvm-profdata show --all-functions --counts \
14+
// RUN: %target_triple.%basename_t.clang.profraw | \
15+
// RUN: %fcheck-generic --check-prefix="CLANG-PGO"
16+
17+
// REQUIRES: gpu
18+
// REQUIRES: pgo
19+
20+
int test1(int a) { return a / 2; }
21+
int test2(int a) { return a * 2; }
22+
23+
int main() {
24+
int m = 2;
25+
#pragma omp target
26+
for (int i = 0; i < 10; i++) {
27+
m = test1(m);
28+
for (int j = 0; j < 2; j++) {
29+
m = test2(m);
30+
}
31+
}
32+
}
33+
34+
// LLVM-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
35+
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
36+
// LLVM-PGO: Counters: 4
37+
// LLVM-PGO: Block counts: [20, 10, 2, 1]
38+
39+
// LLVM-PGO-LABEL: test1:
40+
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
41+
// LLVM-PGO: Counters: 1
42+
// LLVM-PGO: Block counts: [10]
43+
44+
// LLVM-PGO-LABEL: test2:
45+
// LLVM-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
46+
// LLVM-PGO: Counters: 1
47+
// LLVM-PGO: Block counts: [20]
48+
49+
// LLVM-PGO-LABEL: Instrumentation level:
50+
// LLVM-PGO-SAME: IR
51+
// LLVM-PGO-SAME: entry_first = 0
52+
// LLVM-PGO-LABEL: Functions shown:
53+
// LLVM-PGO-SAME: 3
54+
// LLVM-PGO-LABEL: Maximum function count:
55+
// LLVM-PGO-SAME: 20
56+
// LLVM-PGO-LABEL: Maximum internal block count:
57+
// LLVM-PGO-SAME: 10
58+
59+
// CLANG-PGO-LABEL: __omp_offloading_{{[_0-9a-zA-Z]*}}_main_{{[_0-9a-zA-Z]*}}:
60+
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
61+
// CLANG-PGO: Counters: 3
62+
// CLANG-PGO: Function count: 0
63+
// CLANG-PGO: Block counts: [11, 20]
64+
65+
// CLANG-PGO-LABEL: test1:
66+
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
67+
// CLANG-PGO: Counters: 1
68+
// CLANG-PGO: Function count: 10
69+
// CLANG-PGO: Block counts: []
70+
71+
// CLANG-PGO-LABEL: test2:
72+
// CLANG-PGO: Hash: {{0[xX][0-9a-fA-F]+}}
73+
// CLANG-PGO: Counters: 1
74+
// CLANG-PGO: Function count: 20
75+
// CLANG-PGO: Block counts: []
76+
77+
// CLANG-PGO-LABEL: Instrumentation level:
78+
// CLANG-PGO-SAME: Front-end
79+
// CLANG-PGO-LABEL: Functions shown:
80+
// CLANG-PGO-SAME: 3
81+
// CLANG-PGO-LABEL: Maximum function count:
82+
// CLANG-PGO-SAME: 20
83+
// CLANG-PGO-LABEL: Maximum internal block count:
84+
// CLANG-PGO-SAME: 20

0 commit comments

Comments
 (0)