Skip to content

Commit 667bbd2

Browse files
authored
[msan] Apply handleVectorReduceIntrinsic to max/min vector instructions (#129819)
Changes the handling of: - llvm.aarch64.neon.smaxv - llvm.aarch64.neon.sminv - llvm.aarch64.neon.umaxv - llvm.aarch64.neon.uminv - llvm.vector.reduce.smax - llvm.vector.reduce.smin - llvm.vector.reduce.umax - llvm.vector.reduce.umin - llvm.vector.reduce.fmax - llvm.vector.reduce.fmin from the default strict handling (visitInstruction) to handleVectorReduceIntrinsic. Also adds a parameter to handleVectorReduceIntrinsic to specify whether the return type must match the elements of the vector. Updates the tests from #129741, #129810, #129768
1 parent 9e87caf commit 667bbd2

File tree

6 files changed

+314
-531
lines changed

6 files changed

+314
-531
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 46 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3656,14 +3656,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
36563656
// Instrument generic vector reduction intrinsics
36573657
// by ORing together all their fields.
36583658
//
3659-
// The return type does not need to be the same type as the fields
3659+
// If AllowShadowCast is true, the return type does not need to be the same
3660+
// type as the fields
36603661
// e.g., declare i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8>)
3661-
void handleVectorReduceIntrinsic(IntrinsicInst &I) {
3662+
void handleVectorReduceIntrinsic(IntrinsicInst &I, bool AllowShadowCast) {
36623663
assert(I.arg_size() == 1);
36633664

36643665
IRBuilder<> IRB(&I);
36653666
Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
3666-
S = CreateShadowCast(IRB, S, getShadowTy(&I));
3667+
if (AllowShadowCast)
3668+
S = CreateShadowCast(IRB, S, getShadowTy(&I));
3669+
else
3670+
assert(S->getType() == getShadowTy(&I));
36673671
setShadow(&I, S);
36683672
setOriginForNaryOp(I);
36693673
}
@@ -3672,13 +3676,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
36723676
// e.g., call float @llvm.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float>
36733677
// %a1)
36743678
// shadow = shadow[a0] | shadow[a1.0] | shadow[a1.1]
3679+
//
3680+
// The type of the return value, initial starting value, and elements of the
3681+
// vector must be identical.
36753682
void handleVectorReduceWithStarterIntrinsic(IntrinsicInst &I) {
36763683
assert(I.arg_size() == 2);
36773684

36783685
IRBuilder<> IRB(&I);
36793686
Value *Shadow0 = getShadow(&I, 0);
36803687
Value *Shadow1 = IRB.CreateOrReduce(getShadow(&I, 1));
3688+
assert(Shadow0->getType() == Shadow1->getType());
36813689
Value *S = IRB.CreateOr(Shadow0, Shadow1);
3690+
assert(S->getType() == getShadowTy(&I));
36823691
setShadow(&I, S);
36833692
setOriginForNaryOp(I);
36843693
}
@@ -4461,21 +4470,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
44614470
case Intrinsic::vector_reduce_add:
44624471
case Intrinsic::vector_reduce_xor:
44634472
case Intrinsic::vector_reduce_mul:
4464-
// Add reduction to scalar
4465-
case Intrinsic::aarch64_neon_faddv:
4466-
case Intrinsic::aarch64_neon_saddv:
4467-
case Intrinsic::aarch64_neon_uaddv:
4468-
// Floating-point min/max (vector)
4469-
// The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
4470-
// but our shadow propagation is the same.
4471-
case Intrinsic::aarch64_neon_fmaxv:
4472-
case Intrinsic::aarch64_neon_fminv:
4473-
case Intrinsic::aarch64_neon_fmaxnmv:
4474-
case Intrinsic::aarch64_neon_fminnmv:
4475-
// Sum long across vector
4476-
case Intrinsic::aarch64_neon_saddlv:
4477-
case Intrinsic::aarch64_neon_uaddlv:
4478-
handleVectorReduceIntrinsic(I);
4473+
// Signed/Unsigned Min/Max
4474+
// TODO: handling similarly to AND/OR may be more precise.
4475+
case Intrinsic::vector_reduce_smax:
4476+
case Intrinsic::vector_reduce_smin:
4477+
case Intrinsic::vector_reduce_umax:
4478+
case Intrinsic::vector_reduce_umin:
4479+
// TODO: this has no false positives, but arguably we should check that all
4480+
// the bits are initialized.
4481+
case Intrinsic::vector_reduce_fmax:
4482+
case Intrinsic::vector_reduce_fmin:
4483+
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/false);
44794484
break;
44804485

44814486
case Intrinsic::vector_reduce_fadd:
@@ -4918,6 +4923,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
49184923
break;
49194924
}
49204925

4926+
// Add reduction to scalar
4927+
case Intrinsic::aarch64_neon_faddv:
4928+
case Intrinsic::aarch64_neon_saddv:
4929+
case Intrinsic::aarch64_neon_uaddv:
4930+
// Signed/Unsigned min/max (Vector)
4931+
// TODO: handling similarly to AND/OR may be more precise.
4932+
case Intrinsic::aarch64_neon_smaxv:
4933+
case Intrinsic::aarch64_neon_sminv:
4934+
case Intrinsic::aarch64_neon_umaxv:
4935+
case Intrinsic::aarch64_neon_uminv:
4936+
// Floating-point min/max (vector)
4937+
// The f{min,max}"nm"v variants handle NaN differently than f{min,max}v,
4938+
// but our shadow propagation is the same.
4939+
case Intrinsic::aarch64_neon_fmaxv:
4940+
case Intrinsic::aarch64_neon_fminv:
4941+
case Intrinsic::aarch64_neon_fmaxnmv:
4942+
case Intrinsic::aarch64_neon_fminnmv:
4943+
// Sum long across vector
4944+
case Intrinsic::aarch64_neon_saddlv:
4945+
case Intrinsic::aarch64_neon_uaddlv:
4946+
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
4947+
break;
4948+
49214949
// Saturating extract narrow
49224950
case Intrinsic::aarch64_neon_sqxtn:
49234951
case Intrinsic::aarch64_neon_sqxtun:

llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-smaxv.ll

Lines changed: 40 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@
22
; RUN: opt < %s -passes=msan -S | FileCheck %s
33
;
44
; Forked from llvm/test/CodeGen/AArch64/arm64-uminv.ll
5-
;
6-
; Handled suboptimally (visitInstruction):
7-
; - llvm.aarch64.neon.smaxv
85

96
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
107
target triple = "aarch64--linux-android9001"
@@ -15,16 +12,12 @@ define signext i8 @test_vmaxv_s8(<8 x i8> %a1) #0 {
1512
; CHECK-NEXT: [[ENTRY:.*:]]
1613
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
1714
; CHECK-NEXT: call void @llvm.donothing()
18-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
19-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
20-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1:![0-9]+]]
21-
; CHECK: [[BB2]]:
22-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]]
23-
; CHECK-NEXT: unreachable
24-
; CHECK: [[BB3]]:
15+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
16+
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
2517
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A1]])
18+
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
2619
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
27-
; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
20+
; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
2821
; CHECK-NEXT: ret i8 [[TMP4]]
2922
;
3023
entry:
@@ -39,16 +32,12 @@ define signext i16 @test_vmaxv_s16(<4 x i16> %a1) #0 {
3932
; CHECK-NEXT: [[ENTRY:.*:]]
4033
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
4134
; CHECK-NEXT: call void @llvm.donothing()
42-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
43-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
44-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
45-
; CHECK: [[BB2]]:
46-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
47-
; CHECK-NEXT: unreachable
48-
; CHECK: [[BB3]]:
35+
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
36+
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
4937
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A1]])
38+
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
5039
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
51-
; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
40+
; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
5241
; CHECK-NEXT: ret i16 [[TMP4]]
5342
;
5443
entry:
@@ -63,15 +52,9 @@ define i32 @test_vmaxv_s32(<2 x i32> %a1) #0 {
6352
; CHECK-NEXT: [[ENTRY:.*:]]
6453
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
6554
; CHECK-NEXT: call void @llvm.donothing()
66-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
67-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
68-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
69-
; CHECK: [[BB2]]:
70-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
71-
; CHECK-NEXT: unreachable
72-
; CHECK: [[BB3]]:
55+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
7356
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A1]])
74-
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
57+
; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
7558
; CHECK-NEXT: ret i32 [[VMAXV_I]]
7659
;
7760
entry:
@@ -85,16 +68,12 @@ define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) #0 {
8568
; CHECK-NEXT: [[ENTRY:.*:]]
8669
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
8770
; CHECK-NEXT: call void @llvm.donothing()
88-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
89-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
90-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
91-
; CHECK: [[BB2]]:
92-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
93-
; CHECK-NEXT: unreachable
94-
; CHECK: [[BB3]]:
71+
; CHECK-NEXT: [[TMP1:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
72+
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
9573
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A1]])
74+
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i8
9675
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i8
97-
; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
76+
; CHECK-NEXT: store i8 [[_MSPROP]], ptr @__msan_retval_tls, align 8
9877
; CHECK-NEXT: ret i8 [[TMP4]]
9978
;
10079
entry:
@@ -109,16 +88,12 @@ define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) #0 {
10988
; CHECK-NEXT: [[ENTRY:.*:]]
11089
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
11190
; CHECK-NEXT: call void @llvm.donothing()
112-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
113-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
114-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
115-
; CHECK: [[BB2]]:
116-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
117-
; CHECK-NEXT: unreachable
118-
; CHECK: [[BB3]]:
91+
; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
92+
; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
11993
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A1]])
94+
; CHECK-NEXT: [[_MSPROP:%.*]] = trunc i32 [[TMP2]] to i16
12095
; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[VMAXV_I]] to i16
121-
; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
96+
; CHECK-NEXT: store i16 [[_MSPROP]], ptr @__msan_retval_tls, align 8
12297
; CHECK-NEXT: ret i16 [[TMP4]]
12398
;
12499
entry:
@@ -133,15 +108,9 @@ define i32 @test_vmaxvq_s32(<4 x i32> %a1) #0 {
133108
; CHECK-NEXT: [[ENTRY:.*:]]
134109
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
135110
; CHECK-NEXT: call void @llvm.donothing()
136-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
137-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
138-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
139-
; CHECK: [[BB2]]:
140-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
141-
; CHECK-NEXT: unreachable
142-
; CHECK: [[BB3]]:
111+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
143112
; CHECK-NEXT: [[VMAXV_I:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A1]])
144-
; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
113+
; CHECK-NEXT: store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
145114
; CHECK-NEXT: ret i32 [[VMAXV_I]]
146115
;
147116
entry:
@@ -156,16 +125,12 @@ define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) #0 {
156125
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
157126
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
158127
; CHECK-NEXT: call void @llvm.donothing()
159-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to i64
160-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
161-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
162-
; CHECK: [[BB3]]:
163-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
164-
; CHECK-NEXT: unreachable
165-
; CHECK: [[BB4]]:
128+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP0]])
129+
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
166130
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> [[A2]])
131+
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
167132
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
168-
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 0, i32 3
133+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
169134
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i8> [[A1]], i8 [[TMP6]], i32 3
170135
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
171136
; CHECK-NEXT: ret <8 x i8> [[TMP7]]
@@ -184,16 +149,12 @@ define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) #0
184149
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
185150
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr @__msan_param_tls, align 8
186151
; CHECK-NEXT: call void @llvm.donothing()
187-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
188-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
189-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
190-
; CHECK: [[BB3]]:
191-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
192-
; CHECK-NEXT: unreachable
193-
; CHECK: [[BB4]]:
152+
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP0]])
153+
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
194154
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> [[A2]])
155+
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
195156
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
196-
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 0, i32 3
157+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
197158
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i16> [[A1]], i16 [[TMP6]], i32 3
198159
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
199160
; CHECK-NEXT: ret <4 x i16> [[TMP7]]
@@ -212,15 +173,9 @@ define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) #0
212173
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
213174
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
214175
; CHECK-NEXT: call void @llvm.donothing()
215-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
216-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
217-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
218-
; CHECK: [[BB3]]:
219-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
220-
; CHECK-NEXT: unreachable
221-
; CHECK: [[BB4]]:
176+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP0]])
222177
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> [[A2]])
223-
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 0, i32 1
178+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i32 1
224179
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[A1]], i32 [[TMP5]], i32 1
225180
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
226181
; CHECK-NEXT: ret <2 x i32> [[TMP6]]
@@ -238,16 +193,12 @@ define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) #0
238193
; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
239194
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
240195
; CHECK-NEXT: call void @llvm.donothing()
241-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to i128
242-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
243-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
244-
; CHECK: [[BB3]]:
245-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
246-
; CHECK-NEXT: unreachable
247-
; CHECK: [[BB4]]:
196+
; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> [[TMP0]])
197+
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
248198
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> [[A2]])
199+
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i8
249200
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8
250-
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 0, i32 3
201+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <16 x i8> [[TMP1]], i8 [[_MSPROP1]], i32 3
251202
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[A1]], i8 [[TMP6]], i32 3
252203
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
253204
; CHECK-NEXT: ret <16 x i8> [[TMP7]]
@@ -266,16 +217,12 @@ define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) #
266217
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
267218
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
268219
; CHECK-NEXT: call void @llvm.donothing()
269-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP0]] to i128
270-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
271-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
272-
; CHECK: [[BB3]]:
273-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
274-
; CHECK-NEXT: unreachable
275-
; CHECK: [[BB4]]:
220+
; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP0]])
221+
; CHECK-NEXT: [[TMP3:%.*]] = zext i16 [[TMP2]] to i32
276222
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> [[A2]])
223+
; CHECK-NEXT: [[_MSPROP1:%.*]] = trunc i32 [[TMP3]] to i16
277224
; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i16
278-
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 0, i32 3
225+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[_MSPROP1]], i32 3
279226
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[A1]], i16 [[TMP6]], i32 3
280227
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
281228
; CHECK-NEXT: ret <8 x i16> [[TMP7]]
@@ -294,15 +241,9 @@ define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) #
294241
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
295242
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
296243
; CHECK-NEXT: call void @llvm.donothing()
297-
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
298-
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
299-
; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
300-
; CHECK: [[BB3]]:
301-
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]]
302-
; CHECK-NEXT: unreachable
303-
; CHECK: [[BB4]]:
244+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP0]])
304245
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> [[A2]])
305-
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i32 3
246+
; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[TMP2]], i32 3
306247
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[A1]], i32 [[TMP5]], i32 3
307248
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
308249
; CHECK-NEXT: ret <4 x i32> [[TMP6]]

0 commit comments

Comments
 (0)