@@ -400,33 +400,21 @@ declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1
400400declare <vscale x 1 x i64 > @llvm.masked.gather.nxv1i64.nxv1p0 (<vscale x 1 x ptr >, i32 , <vscale x 1 x i1 >, <vscale x 1 x i64 >)
401401
402402
403- ; TODO: Make the step loop variant to reflect what the loop vectorizer will emit
404- ; in an EVL tail folding configuration.
405-
406403define <vscale x 1 x i64 > @vp_gather (ptr %a , i32 %len ) {
407404; CHECK-LABEL: @vp_gather(
408405; CHECK-NEXT: vector.ph:
409406; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
410407; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
411- ; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
412- ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP0]], i64 0
413- ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
414408; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
415409; CHECK: vector.body:
416410; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
417411; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
418- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
419412; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
420- ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
421- ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
422- ; CHECK-NEXT: [[ODD:%.*]] = and <vscale x 1 x i64> [[VEC_IND]], splat (i64 1)
423- ; CHECK-NEXT: [[MASK:%.*]] = icmp ne <vscale x 1 x i64> [[ODD]], zeroinitializer
424413; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
425- ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> [[MASK]] , i32 [[EVL]] )
414+ ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true) , i32 42 )
426415; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
427416; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
428417; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
429- ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
430418; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
431419; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
432420; CHECK: for.cond.cleanup:
@@ -444,15 +432,8 @@ vector.body: ; preds = %vector.body, %vecto
444432 %index = phi i64 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
445433 %vec.ind = phi <vscale x 1 x i64 > [ %1 , %vector.ph ], [ %vec.ind.next , %vector.body ]
446434 %accum = phi <vscale x 1 x i64 > [ zeroinitializer , %vector.ph ], [ %accum.next , %vector.body ]
447-
448- %elems = sub i64 %wide.trip.count , %index
449- %evl = call i32 @llvm.experimental.get.vector.length.i64 (i64 %elems , i32 1 , i1 true )
450-
451- %odd = and <vscale x 1 x i64 > %vec.ind , splat (i64 1 )
452- %mask = icmp ne <vscale x 1 x i64 > %odd , splat (i64 0 )
453-
454435 %2 = getelementptr inbounds %struct.foo , ptr %a , <vscale x 1 x i64 > %vec.ind , i32 3
455- %gather = call <vscale x 1 x i64 > @llvm.vp.gather (<vscale x 1 x ptr > %2 , <vscale x 1 x i1 > %mask , i32 %evl )
436+ %gather = call <vscale x 1 x i64 > @llvm.vp.gather (<vscale x 1 x ptr > %2 , <vscale x 1 x i1 > splat ( i1 true ) , i32 42 )
456437 %accum.next = add <vscale x 1 x i64 > %accum , %gather
457438 %index.next = add nuw i64 %index , %0
458439 %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %.splat
@@ -463,31 +444,19 @@ for.cond.cleanup: ; preds = %vector.body
463444 ret <vscale x 1 x i64 > %accum.next
464445}
465446
466- ; TODO: Make the step loop variant to reflect what the loop vectorizer will emit
467- ; in an EVL tail folding configuration.
468-
469447define void @vp_scatter (ptr %a , i32 %len ) {
470448; CHECK-LABEL: @vp_scatter(
471449; CHECK-NEXT: vector.ph:
472450; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
473451; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
474- ; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
475- ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP0]], i64 0
476- ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
477452; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
478453; CHECK: vector.body:
479454; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
480455; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
481- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
482- ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
483- ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
484- ; CHECK-NEXT: [[ODD:%.*]] = and <vscale x 1 x i64> [[VEC_IND]], splat (i64 1)
485- ; CHECK-NEXT: [[MASK:%.*]] = icmp ne <vscale x 1 x i64> [[ODD]], zeroinitializer
486456; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
487- ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> [[MASK]] , i32 [[EVL]] )
457+ ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true) , i32 42 )
488458; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
489459; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
490- ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
491460; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
492461; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
493462; CHECK: for.cond.cleanup:
@@ -504,17 +473,120 @@ vector.ph:
504473vector.body: ; preds = %vector.body, %vector.ph
505474 %index = phi i64 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
506475 %vec.ind = phi <vscale x 1 x i64 > [ %1 , %vector.ph ], [ %vec.ind.next , %vector.body ]
476+ %2 = getelementptr inbounds %struct.foo , ptr %a , <vscale x 1 x i64 > %vec.ind , i32 3
477+ tail call void @llvm.vp.scatter (<vscale x 1 x i64 > zeroinitializer , <vscale x 1 x ptr > %2 , <vscale x 1 x i1 > splat (i1 true ), i32 42 )
478+ %index.next = add nuw i64 %index , %0
479+ %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %.splat
480+ %3 = icmp ne i64 %index.next , %wide.trip.count
481+ br i1 %3 , label %for.cond.cleanup , label %vector.body
482+
483+ for.cond.cleanup: ; preds = %vector.body
484+ ret void
485+ }
486+
487+ ; Test that reflects what the loop vectorizer will generate for an EVL tail
488+ ; folded loop
489+
490+ define <vscale x 1 x i64 > @evl_gather (ptr %a , i32 %len ) {
491+ ; CHECK-LABEL: @evl_gather(
492+ ; CHECK-NEXT: vector.ph:
493+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
494+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
495+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
496+ ; CHECK: vector.body:
497+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
498+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
499+ ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
500+ ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDEX]]
501+ ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
502+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
503+ ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP2]], <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
504+ ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
505+ ; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
506+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[EVL_ZEXT]]
507+ ; CHECK-NEXT: [[EVL_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[EVL_ZEXT]], i64 0
508+ ; CHECK-NEXT: [[EVL_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[EVL_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
509+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[EVL_SPLAT]]
510+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
511+ ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
512+ ; CHECK: for.cond.cleanup:
513+ ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
514+ ;
515+ vector.ph:
516+ %wide.trip.count = zext i32 %len to i64
517+ %1 = tail call <vscale x 1 x i64 > @llvm.stepvector.nxv1i64 ()
518+ br label %vector.body
519+
520+ vector.body: ; preds = %vector.body, %vector.ph
521+ %index = phi i64 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
522+ %vec.ind = phi <vscale x 1 x i64 > [ %1 , %vector.ph ], [ %vec.ind.next , %vector.body ]
523+ %accum = phi <vscale x 1 x i64 > [ zeroinitializer , %vector.ph ], [ %accum.next , %vector.body ]
507524
508525 %elems = sub i64 %wide.trip.count , %index
509526 %evl = call i32 @llvm.experimental.get.vector.length.i64 (i64 %elems , i32 1 , i1 true )
510527
511- %odd = and <vscale x 1 x i64 > %vec.ind , splat (i64 1 )
512- %mask = icmp ne <vscale x 1 x i64 > %odd , splat (i64 0 )
528+ %2 = getelementptr inbounds %struct.foo , ptr %a , <vscale x 1 x i64 > %vec.ind , i32 3
529+ %gather = call <vscale x 1 x i64 > @llvm.vp.gather (<vscale x 1 x ptr > %2 , <vscale x 1 x i1 > splat (i1 true ), i32 %evl )
530+ %accum.next = add <vscale x 1 x i64 > %accum , %gather
531+
532+ %evl.zext = zext i32 %evl to i64
533+ %index.next = add nuw i64 %index , %evl.zext
534+ %evl.splatinsert = insertelement <vscale x 1 x i64 > poison, i64 %evl.zext , i64 0
535+ %evl.splat = shufflevector <vscale x 1 x i64 > %evl.splatinsert , <vscale x 1 x i64 > poison, <vscale x 1 x i32 > zeroinitializer
536+ %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %evl.splat
537+ %3 = icmp ne i64 %index.next , %wide.trip.count
538+ br i1 %3 , label %for.cond.cleanup , label %vector.body
539+
540+ for.cond.cleanup: ; preds = %vector.body
541+ ret <vscale x 1 x i64 > %accum.next
542+ }
543+
544+ ; Test that reflects what the loop vectorizer will generate for an EVL tail
545+ ; folded loop
546+
547+ define void @evl_scatter (ptr %a , i32 %len ) {
548+ ; CHECK-LABEL: @evl_scatter(
549+ ; CHECK-NEXT: vector.ph:
550+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
551+ ; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
552+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
553+ ; CHECK: vector.body:
554+ ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
555+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
556+ ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
557+ ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
558+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
559+ ; CHECK-NEXT: tail call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
560+ ; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
561+ ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
562+ ; CHECK-NEXT: [[EVL_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[EVL_ZEXT]], i64 0
563+ ; CHECK-NEXT: [[EVL_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[EVL_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
564+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[EVL_SPLAT]]
565+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
566+ ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
567+ ; CHECK: for.cond.cleanup:
568+ ; CHECK-NEXT: ret void
569+ ;
570+ vector.ph:
571+ %wide.trip.count = zext i32 %len to i64
572+ %1 = tail call <vscale x 1 x i64 > @llvm.stepvector.nxv1i64 ()
573+ br label %vector.body
574+
575+ vector.body: ; preds = %vector.body, %vector.ph
576+ %index = phi i64 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
577+ %vec.ind = phi <vscale x 1 x i64 > [ %1 , %vector.ph ], [ %vec.ind.next , %vector.body ]
578+
579+ %elems = sub i64 %wide.trip.count , %index
580+ %evl = call i32 @llvm.experimental.get.vector.length.i64 (i64 %elems , i32 1 , i1 true )
513581
514582 %2 = getelementptr inbounds %struct.foo , ptr %a , <vscale x 1 x i64 > %vec.ind , i32 3
515- tail call void @llvm.vp.scatter (<vscale x 1 x i64 > zeroinitializer , <vscale x 1 x ptr > %2 , <vscale x 1 x i1 > %mask , i32 %evl )
516- %index.next = add nuw i64 %index , %0
517- %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %.splat
583+ tail call void @llvm.vp.scatter (<vscale x 1 x i64 > zeroinitializer , <vscale x 1 x ptr > %2 , <vscale x 1 x i1 > splat (i1 true ), i32 %evl )
584+
585+ %evl.zext = zext i32 %evl to i64
586+ %index.next = add nuw i64 %index , %evl.zext
587+ %evl.splatinsert = insertelement <vscale x 1 x i64 > poison, i64 %evl.zext , i64 0
588+ %evl.splat = shufflevector <vscale x 1 x i64 > %evl.splatinsert , <vscale x 1 x i64 > poison, <vscale x 1 x i32 > zeroinitializer
589+ %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %evl.splat
518590 %3 = icmp ne i64 %index.next , %wide.trip.count
519591 br i1 %3 , label %for.cond.cleanup , label %vector.body
520592
0 commit comments