Skip to content

Commit 31429e7

Browse files
authored
[CodeGen] Emit a more accurate alignment for non-temporal loads/stores (#75675)
Call EmitPointerWithAlignment to compute the alignment based on the underlying lvalue's alignment when it's available.
1 parent dd45be0 commit 31429e7

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -232,19 +232,19 @@ static Value *MakeBinaryAtomicValue(
232232

233233
static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
234234
Value *Val = CGF.EmitScalarExpr(E->getArg(0));
235-
Value *Address = CGF.EmitScalarExpr(E->getArg(1));
235+
Address Addr = CGF.EmitPointerWithAlignment(E->getArg(1));
236236

237237
Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
238-
LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getArg(0)->getType());
238+
LValue LV = CGF.MakeAddrLValue(Addr, E->getArg(0)->getType());
239239
LV.setNontemporal(true);
240240
CGF.EmitStoreOfScalar(Val, LV, false);
241241
return nullptr;
242242
}
243243

244244
static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
245-
Value *Address = CGF.EmitScalarExpr(E->getArg(0));
245+
Address Addr = CGF.EmitPointerWithAlignment(E->getArg(0));
246246

247-
LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
247+
LValue LV = CGF.MakeAddrLValue(Addr, E->getType());
248248
LV.setNontemporal(true);
249249
return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
250250
}

clang/test/CodeGen/Nontemporal.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,17 @@ void test_all_sizes(void) // CHECK-LABEL: test_all_sizes
4646
vf2 = __builtin_nontemporal_load(&vf1); // CHECK: load <4 x float>{{.*}}align 16, !nontemporal
4747
vc2 = __builtin_nontemporal_load(&vc1); // CHECK: load <8 x i8>{{.*}}align 8, !nontemporal
4848
}
49+
50+
struct S { char c[16]; };
51+
S x;
52+
53+
typedef int v4si __attribute__ ((vector_size(16)));
54+
55+
// CHECK-LABEL: define void @_Z14test_alignmentv()
56+
// CHECK: load <4 x i32>, ptr @x, align 1, !nontemporal
57+
// CHECK: store <4 x i32> %1, ptr @x, align 1, !nontemporal
58+
59+
void test_alignment() {
60+
auto t = __builtin_nontemporal_load((v4si*)x.c);
61+
__builtin_nontemporal_store(t, (v4si*)x.c);
62+
}

0 commit comments

Comments
 (0)