Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into amd-trunk-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
ergawy committed Jul 24, 2024
2 parents 1a9dfc9 + 558a895 commit 69a15f5
Show file tree
Hide file tree
Showing 54 changed files with 1,075 additions and 67 deletions.
3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ Improvements to Coverage Mapping
Bug Fixes in This Version
-------------------------

- Fixed the definition of ``ATOMIC_FLAG_INIT`` in ``<stdatomic.h>`` so it can
be used in C++.

Bug Fixes to Compiler Builtins
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/CodeGenOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,9 @@ CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0)
/// Assume that by-value parameters do not alias any other values.
CODEGENOPT(PassByValueIsNoAlias, 1, 0)

/// Whether to store register parameters to stack.
CODEGENOPT(SaveRegParams, 1, 0)

/// Whether to not follow the AAPCS that enforces volatile bit-field access width to be
/// according to the field declaring type width.
CODEGENOPT(AAPCSBitfieldWidth, 1, 1)
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5089,6 +5089,11 @@ def mspe : Flag<["-"], "mspe">, Group<m_ppc_Features_Group>;
def mno_spe : Flag<["-"], "mno-spe">, Group<m_ppc_Features_Group>;
def mefpu2 : Flag<["-"], "mefpu2">, Group<m_ppc_Features_Group>;
} // let Flags = [TargetSpecific]
def msave_reg_params : Flag<["-"], "msave-reg-params">, Group<m_Group>,
Flags<[TargetSpecific]>,
Visibility<[ClangOption, CC1Option]>,
HelpText<"Save arguments passed by registers to ABI-defined stack positions">,
MarshallingInfoFlag<CodeGenOpts<"SaveRegParams">>;
def mabi_EQ_quadword_atomics : Flag<["-"], "mabi=quadword-atomics">,
Group<m_Group>, Visibility<[ClangOption, CC1Option]>,
HelpText<"Enable quadword atomics ABI on AIX (AIX PPC64 only). Uses lqarx/stqcx. instructions.">,
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2025,6 +2025,9 @@ static void getTrivialDefaultFunctionAttributes(
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
}

if (CodeGenOpts.SaveRegParams && !AttrOnCallSite)
FuncAttrs.addAttribute("save-reg-params");

for (StringRef Attr : CodeGenOpts.DefaultFunctionAttrs) {
StringRef Var, Value;
std::tie(Var, Value) = Attr.split('=');
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/Driver/ToolChains/AIX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,9 @@ void AIX::addClangTargetOptions(
options::OPT_mtocdata))
addTocDataOptions(Args, CC1Args, getDriver());

if (Args.hasArg(options::OPT_msave_reg_params))
CC1Args.push_back("-msave-reg-params");

if (Args.hasFlag(options::OPT_fxl_pragma_pack,
options::OPT_fno_xl_pragma_pack, true))
CC1Args.push_back("-fxl-pragma-pack");
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Headers/stdatomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,11 @@ typedef _Atomic(uintmax_t) atomic_uintmax_t;

typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;

#ifdef __cplusplus
#define ATOMIC_FLAG_INIT {false}
#else
#define ATOMIC_FLAG_INIT { 0 }
#endif

/* These should be provided by the libc implementation. */
#ifdef __cplusplus
Expand Down
10 changes: 10 additions & 0 deletions clang/test/CodeGen/PowerPC/save-reg-params.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm -o - %s -msave-reg-params | FileCheck -check-prefix=SAVE %s
// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm -o - %s -msave-reg-params | FileCheck -check-prefix=SAVE %s
// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm -o - %s | FileCheck -check-prefix=NOSAVE %s
// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm -o - %s | FileCheck -check-prefix=NOSAVE %s

void bar(int);
void foo(int x) { bar(x); }

// SAVE: attributes #{{[0-9]+}} = { {{.+}} "save-reg-params" {{.+}} }
// NOSAVE-NOT: "save-reg-params"
7 changes: 7 additions & 0 deletions clang/test/Driver/aix-save-reg-params.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// RUN: %clang -### -target powerpc-ibm-aix-xcoff -msave-reg-params -c %s -o /dev/null 2>&1 | FileCheck %s
// RUN: %clang -### -target powerpc64-ibm-aix-xcoff -msave-reg-params -c %s -o /dev/null 2>&1 | FileCheck %s
// RUN: %clang -### -target powerpc-ibm-aix-xcoff -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DISABLE
// RUN: %clang -### -target powerpc64-ibm-aix-xcoff -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=DISABLE

// CHECK: "-msave-reg-params"
// DISABLE-NOT: "-msave-reg-params"
4 changes: 4 additions & 0 deletions clang/test/Driver/ppc-unsupported.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@
// RUN: -c %s 2>&1 | FileCheck %s
// RUN: not %clang -target powerpc-unknown-aix -mabi=quadword-atomics \
// RUN: -c %s 2>&1 | FileCheck %s
// RUN: not %clang -target powerpc64le-unknown-linux-gnu -msave-reg-params \
// RUN: -c %s 2>&1 | FileCheck %s
// RUN: not %clang -target powerpc-unknown-unknown -msave-reg-params \
// RUN: -c %s 2>&1 | FileCheck %s
// CHECK: unsupported option
5 changes: 5 additions & 0 deletions clang/test/Headers/stdatomic.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
// RUN: %clang_cc1 -std=c11 -E %s | FileCheck %s
// RUN: %clang_cc1 -std=c11 -fms-compatibility -E %s | FileCheck %s
// RUN: %clang_cc1 -std=c11 %s -verify
// RUN: %clang_cc1 -x c++ -std=c++11 %s -verify
// expected-no-diagnostics
#include <stdatomic.h>

int bool_lock_free = ATOMIC_BOOL_LOCK_FREE;
Expand Down Expand Up @@ -31,3 +34,5 @@ int llong_lock_free = ATOMIC_LLONG_LOCK_FREE;

int pointer_lock_free = ATOMIC_POINTER_LOCK_FREE;
// CHECK: pointer_lock_free = {{ *[012] *;}}

atomic_flag f = ATOMIC_FLAG_INIT;
2 changes: 1 addition & 1 deletion compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2237,6 +2237,7 @@ static const char *RegNumToRegName(int reg) {
case 31:
return "sp";
# endif
# endif // SANITIZER_LINUX
default:
return NULL;
}
Expand Down Expand Up @@ -2302,7 +2303,6 @@ static void DumpSingleReg(ucontext_t *ctx, int RegNum) {
(void)RegName;
# endif
}
# endif

void SignalContext::DumpAllRegisters(void *context) {
ucontext_t *ucontext = (ucontext_t *)context;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//

// This test appears to hang with picolibc & qemu.
// This test did pass but is very slow when run using qemu. ~7 minutes on a
// Neoverse N1 (AArch64) server core.
// UNSUPPORTED: LIBCXX-PICOLIBC-FIXME

// <algorithm>
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21413,7 +21413,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}

// Turn 'store undef, Ptr' -> nothing.
if (Value.isUndef() && ST->isUnindexed())
if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
return Chain;

// Try to infer better alignment information than the store already has.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H

#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
#include "AMDGPUMemoryUtils.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUAnnotateKernelFeatures.cpp
AMDGPUAnnotateUniformValues.cpp
AMDGPUArgumentUsageInfo.cpp
AMDGPUAsanInstrumentation.cpp
AMDGPUAsmPrinter.cpp
AMDGPUAtomicOptimizer.cpp
AMDGPUAttributor.cpp
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
add_llvm_component_library(LLVMAMDGPUUtils
AMDGPUAsanInstrumentation.cpp
AMDGPUAsmUtils.cpp
AMDGPUBaseInfo.cpp
AMDGPUDelayedMCExpr.cpp
Expand Down
23 changes: 23 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7215,6 +7215,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// Reserve space for the linkage area on the stack.
const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
uint64_t SaveStackPos = CCInfo.getStackSize();
bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);

SmallVector<SDValue, 8> MemOps;
Expand All @@ -7233,6 +7235,27 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
continue;

if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
const TargetRegisterClass *RegClass = getRegClassForSVT(
LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
// On PPC64, debugger assumes extended 8-byte values are stored from GPR.
MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
MachinePointerInfo(), Align(PtrByteSize));
SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
MemOps.push_back(StoreReg);
}

if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
unsigned StoreSize =
Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
}

auto HandleMemLoc = [&]() {
const unsigned LocSize = LocVT.getStoreSize();
const unsigned ValSize = ValVT.getStoreSize();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,6 @@ define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) #
%queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
%implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
%dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
ret void
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace(
define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
%bc = bitcast i64 %undef to <2 x i32>
store volatile <2 x i32> %bc, ptr addrspace(1) %out
store <2 x i32> %bc, ptr addrspace(1) %out
ret void
}

Expand All @@ -83,7 +83,7 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractel
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
%bc = bitcast i64 %undef to <2 x i32>
%elt1 = extractelement <2 x i32> %bc, i32 1
store volatile i32 %elt1, ptr addrspace(1) %out
store i32 %elt1, ptr addrspace(1) %out
ret void
}

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,8 @@ define void @func_use_lds_global() {
ret void
}

; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast() {
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (ptr addrspace(1)): local memory global used by non-kernel function
define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) {
; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast:
; GFX8-SDAG: ; %bb.0:
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down Expand Up @@ -153,7 +153,7 @@ define void @func_use_lds_global_constexpr_cast() {
; GISEL-NEXT: s_setpc_b64 s[30:31]
; GISEL-NEXT: .LBB1_2:
; GISEL-NEXT: s_endpgm
store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4
store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) %out, align 4
ret void
}

Expand Down
23 changes: 17 additions & 6 deletions llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
Expand Down Expand Up @@ -126,10 +126,21 @@ define amdgpu_cs void @caller() {
declare amdgpu_gfx void @callee(i32)

define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) {
; GFX9-LABEL: workgroup_ids_gfx:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GFX9-SDAG-LABEL: workgroup_ids_gfx:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[0:1], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[2:3], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: global_store_dword v[4:5], v0, off
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-GISEL-LABEL: workgroup_ids_gfx:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx:
; GFX9ARCH-SDAG: ; %bb.0:
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/mem-builtins.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,64 @@ declare hidden i32 @strnlen(ptr nocapture, i32) #1
declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1


; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr): unsupported call to function memcmp
; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr, ptr addrspace(1)): unsupported call to function memcmp

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@hi+12
define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) #0 {
define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p, ptr addrspace(1) %out) #0 {
entry:
%cmp = tail call i32 @memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, i64 2)
store volatile i32 %cmp, ptr addrspace(1) undef
store i32 %cmp, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64): unsupported call to function memchr
; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64, ptr addrspace(1)): unsupported call to function memchr

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@hi+12
define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) #0 {
define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len, ptr addrspace(1) %out) #0 {
%res = call ptr addrspace(1) @memchr(ptr addrspace(1) %src, i32 %char, i64 %len)
store volatile ptr addrspace(1) %res, ptr addrspace(1) undef
store ptr addrspace(1) %res, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr): unsupported call to function strcpy
; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcpy

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@hi+12
define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 {
define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src, ptr addrspace(1) %out) #0 {
%res = call ptr @strcpy(ptr %dst, ptr %src)
store volatile ptr %res, ptr addrspace(1) undef
store ptr %res, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr): unsupported call to function strcmp
; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcmp

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@hi+12
define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 {
define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1, ptr addrspace(1) %out) #0 {
%res = call i32 @strcmp(ptr %src0, ptr %src1)
store volatile i32 %res, ptr addrspace(1) undef
store i32 %res, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr): unsupported call to function strlen
; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr, ptr addrspace(1)): unsupported call to function strlen

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@hi+12
define amdgpu_kernel void @test_strlen(ptr %src) #0 {
define amdgpu_kernel void @test_strlen(ptr %src, ptr addrspace(1) %out) #0 {
%res = call i32 @strlen(ptr %src)
store volatile i32 %res, ptr addrspace(1) undef
store i32 %res, ptr addrspace(1) %out
ret void
}

; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32): unsupported call to function strnlen
; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32, ptr addrspace(1)): unsupported call to function strnlen

; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@lo+4
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@hi+12
define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) #0 {
define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size, ptr addrspace(1) %out) #0 {
%res = call i32 @strnlen(ptr %src, i32 %size)
store volatile i32 %res, ptr addrspace(1) undef
store i32 %res, ptr addrspace(1) %out
ret void
}

Expand Down
Loading

0 comments on commit 69a15f5

Please sign in to comment.