Skip to content

Missed optimization: Redundant loop retained after memset idiom recognized #139538

Open
@GINN-Imp

Description

@GINN-Imp

LLVM correctly recognizes a memory-zeroing loop and replaces it with a llvm.memset intrinsic. However, it fails to remove the now-redundant loop structure which no longer performs any meaningful computation or side effect. This leads to unnecessary control flow and a redundant PHI/arith chain, preventing further simplification.

Godbolt: https://godbolt.org/z/fz5aKfW5M
alive2 proof: https://alive2.llvm.org/ce/z/ncvpr6

%struct.MvcCubeStruct = type { ptr, i32, i32, [1 x i32] }

@p = external global i64

define ptr @Dec_ConvertSopToMvc(ptr writeonly captures(none) %0, i32 %1) local_unnamed_addr {
  %3 = icmp sgt i32 %1, -1
  br i1 %3, label %.lr.ph, label %._crit_edge

.lr.ph:                                           ; preds = %2
  %4 = getelementptr inbounds nuw i8, ptr %0, i64 16
  br label %5

5:                                                ; preds = %5, %.lr.ph
  %.01 = phi i32 [ %1, %.lr.ph ], [ %8, %5 ]
  %6 = zext nneg i32 %.01 to i64
  store i64 %6, ptr @p, align 8
  %7 = getelementptr [1 x i32], ptr %4, i64 0, i64 %6
  store i32 0, ptr %7, align 4
  %8 = add nsw i32 %.01, -1
  %.not = icmp eq i32 %.01, 0
  br i1 %.not, label %._crit_edge.loopexit, label %5

._crit_edge.loopexit:                             ; preds = %5
  br label %._crit_edge

._crit_edge:                                      ; preds = %._crit_edge.loopexit, %2
  ret ptr null
}

opt -O3:

define noalias noundef ptr @Dec_ConvertSopToMvc(ptr writeonly captures(none) %0, i32 %1) local_unnamed_addr #0 {
  %3 = icmp sgt i32 %1, -1
  br i1 %3, label %.lr.ph, label %._crit_edge

.lr.ph:                                           ; preds = %2
  %4 = getelementptr inbounds nuw i8, ptr %0, i64 16
  %5 = add nuw i32 %1, 1
  %6 = zext i32 %5 to i64
  %7 = shl nuw nsw i64 %6, 2
  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1) %4, i8 0, i64 %7, i1 false)
  br label %8

8:                                                ; preds = %8, %.lr.ph
  %.01 = phi i32 [ %1, %.lr.ph ], [ %9, %8 ]
  %9 = add nsw i32 %.01, -1
  %.not = icmp eq i32 %.01, 0
  br i1 %.not, label %._crit_edge.loopexit, label %8

._crit_edge.loopexit:                             ; preds = %8
  %10 = zext nneg i32 %.01 to i64
  store i64 %10, ptr @p, align 8
  br label %._crit_edge

._crit_edge:                                      ; preds = %._crit_edge.loopexit, %2
  ret ptr null
}

expected:

define ptr @tgt(ptr writeonly captures(none) %0, i32 %1) local_unnamed_addr {
  %3 = icmp sgt i32 %1, -1
  br i1 %3, label %.lr.ph, label %._crit_edge

.lr.ph:                                           ; preds = %2
  %4 = getelementptr inbounds nuw i8, ptr %0, i64 16
  %5 = add nuw i32 %1, 1
  %6 = zext i32 %5 to i64
  %7 = shl nuw nsw i64 %6, 2
  tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1) %4, i8 0, i64 %7, i1 false)
  store i64 0, ptr @p, align 8
  br label %._crit_edge

._crit_edge:                                      ; preds = %.lr.ph, %2
  ret ptr null
}

The reduced IR is derived from https://people.ece.ubc.ca/eddieh/abc_dox/d2/d2e/decFactor_8c.html#a1c91faf7938583e56797f9d044a6c437

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions