Open
Description
LLVM correctly recognizes a memory-zeroing loop and replaces it with a llvm.memset intrinsic. However, it fails to remove the now-redundant loop structure which no longer performs any meaningful computation or side effect. This leads to unnecessary control flow and a redundant PHI/arith chain, preventing further simplification.
Godbolt: https://godbolt.org/z/fz5aKfW5M
alive2 proof: https://alive2.llvm.org/ce/z/ncvpr6
%struct.MvcCubeStruct = type { ptr, i32, i32, [1 x i32] }
@p = external global i64
define ptr @Dec_ConvertSopToMvc(ptr writeonly captures(none) %0, i32 %1) local_unnamed_addr {
%3 = icmp sgt i32 %1, -1
br i1 %3, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %2
%4 = getelementptr inbounds nuw i8, ptr %0, i64 16
br label %5
5: ; preds = %5, %.lr.ph
%.01 = phi i32 [ %1, %.lr.ph ], [ %8, %5 ]
%6 = zext nneg i32 %.01 to i64
store i64 %6, ptr @p, align 8
%7 = getelementptr [1 x i32], ptr %4, i64 0, i64 %6
store i32 0, ptr %7, align 4
%8 = add nsw i32 %.01, -1
%.not = icmp eq i32 %.01, 0
br i1 %.not, label %._crit_edge.loopexit, label %5
._crit_edge.loopexit: ; preds = %5
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.loopexit, %2
ret ptr null
}
opt -O3:
define noalias noundef ptr @Dec_ConvertSopToMvc(ptr writeonly captures(none) %0, i32 %1) local_unnamed_addr #0 {
%3 = icmp sgt i32 %1, -1
br i1 %3, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %2
%4 = getelementptr inbounds nuw i8, ptr %0, i64 16
%5 = add nuw i32 %1, 1
%6 = zext i32 %5 to i64
%7 = shl nuw nsw i64 %6, 2
tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1) %4, i8 0, i64 %7, i1 false)
br label %8
8: ; preds = %8, %.lr.ph
%.01 = phi i32 [ %1, %.lr.ph ], [ %9, %8 ]
%9 = add nsw i32 %.01, -1
%.not = icmp eq i32 %.01, 0
br i1 %.not, label %._crit_edge.loopexit, label %8
._crit_edge.loopexit: ; preds = %8
%10 = zext nneg i32 %.01 to i64
store i64 %10, ptr @p, align 8
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.loopexit, %2
ret ptr null
}
expected:
define ptr @tgt(ptr writeonly captures(none) %0, i32 %1) local_unnamed_addr {
%3 = icmp sgt i32 %1, -1
br i1 %3, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %2
%4 = getelementptr inbounds nuw i8, ptr %0, i64 16
%5 = add nuw i32 %1, 1
%6 = zext i32 %5 to i64
%7 = shl nuw nsw i64 %6, 2
tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1) %4, i8 0, i64 %7, i1 false)
store i64 0, ptr @p, align 8
br label %._crit_edge
._crit_edge: ; preds = %.lr.ph, %2
ret ptr null
}
The reduced IR is derived from https://people.ece.ubc.ca/eddieh/abc_dox/d2/d2e/decFactor_8c.html#a1c91faf7938583e56797f9d044a6c437