@@ -335,90 +335,22 @@ exit:
335
335
ret i32 %final.load
336
336
}
337
337
338
- ; FIXME: shouldn't be vectorized, as the only vector values generated are
339
- ; ephemeral.
340
338
define i32 @ephemeral_load_and_compare_another_load_used_outside (ptr %start , ptr %end ) #0 {
341
339
; CHECK-LABEL: define i32 @ephemeral_load_and_compare_another_load_used_outside(
342
340
; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
343
341
; CHECK-NEXT: [[ENTRY:.*]]:
344
- ; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i64
345
- ; CHECK-NEXT: [[START1:%.*]] = ptrtoint ptr [[START]] to i64
346
- ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[START1]], [[END2]]
347
- ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
348
- ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
349
- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
350
- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
351
- ; CHECK: [[VECTOR_PH]]:
352
- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
353
- ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
354
- ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -8
355
- ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
356
- ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
357
- ; CHECK: [[VECTOR_BODY]]:
358
- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
359
- ; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[END]], align 8
360
- ; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP4]], i64 0
361
- ; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT9]], <4 x ptr> poison, <4 x i32> zeroinitializer
362
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
363
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
364
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
365
- ; CHECK-NEXT: [[WIDE_MASKED_GATHER11:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[BROADCAST_SPLAT10]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
366
- ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
367
- ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER5]], zeroinitializer
368
- ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER8]], zeroinitializer
369
- ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i32> [[WIDE_MASKED_GATHER11]], zeroinitializer
370
- ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
371
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP9]])
372
- ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1
373
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP10]])
374
- ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2
375
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP11]])
376
- ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3
377
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP12]])
378
- ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
379
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP13]])
380
- ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
381
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP14]])
382
- ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
383
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP15]])
384
- ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
385
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP16]])
386
- ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
387
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP17]])
388
- ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
389
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP18]])
390
- ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
391
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP19]])
392
- ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
393
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP20]])
394
- ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
395
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP21]])
396
- ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
397
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP22]])
398
- ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
399
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP23]])
400
- ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
401
- ; CHECK-NEXT: call void @llvm.assume(i1 [[TMP24]])
402
- ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
403
- ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
404
- ; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
405
- ; CHECK: [[MIDDLE_BLOCK]]:
406
- ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
407
- ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
408
- ; CHECK: [[SCALAR_PH]]:
409
- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
410
342
; CHECK-NEXT: br label %[[LOOP:.*]]
411
343
; CHECK: [[LOOP]]:
412
- ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[IV_NEXT2:%.*]], %[[LOOP]] ]
344
+ ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[START ]], %[[ENTRY ]] ], [ [[IV_NEXT2:%.*]], %[[LOOP]] ]
413
345
; CHECK-NEXT: [[IV_NEXT2]] = getelementptr nusw i8, ptr [[IV]], i64 -8
414
346
; CHECK-NEXT: [[L1:%.*]] = load ptr, ptr [[END]], align 8
415
347
; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[L1]], align 4
416
348
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[L2]], 0
417
349
; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]])
418
350
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[IV]], [[END]]
419
- ; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+ ]]
351
+ ; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.* ]], label %[[LOOP]]
420
352
; CHECK: [[EXIT]]:
421
- ; CHECK-NEXT: [[L1_LCSSA:%.*]] = phi ptr [ [[L1]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
353
+ ; CHECK-NEXT: [[L1_LCSSA:%.*]] = phi ptr [ [[L1]], %[[LOOP]] ]
422
354
; CHECK-NEXT: [[FINAL_LOAD:%.*]] = load i32, ptr [[L1_LCSSA]], align 4
423
355
; CHECK-NEXT: ret i32 [[FINAL_LOAD]]
424
356
;
@@ -448,6 +380,4 @@ attributes #0 = { "target-cpu"="skylake-avx512" }
448
380
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
449
381
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
450
382
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
451
- ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
452
- ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
453
383
;.
0 commit comments