Skip to content

Pick more convenient constants in phi nodes [optimization] #132678

Open
@scottmcm

Description

@scottmcm

I was looking at what's essentially <= on a 4-tuple in Rust, and ended up with the following IR: https://llvm.godbolt.org/z/8zaYxd8qM (That link includes that trunk opt doesn't change it.)

Full IR so there's a non-godbolt copy
; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read, inaccessiblemem: readwrite) uwtable
define noundef zeroext i1 @array_of_tuple_le(ptr noalias nocapture noundef readonly align 2 dereferenceable(8) %a, ptr noalias nocapture noundef readonly align 2 dereferenceable(8) %b) {
start:
  %lhs.i.i.i.i = load i16, ptr %a, align 2, !noundef !24
  %rhs.i.i.i.i = load i16, ptr %b, align 2, !noundef !24
  %_5.i.i.i.i = icmp eq i16 %lhs.i.i.i.i, %rhs.i.i.i.i
  br i1 %_5.i.i.i.i, label %bb6.i.i.i, label %bb8.split.loop.exit12.i

bb6.i.i.i:                                        ; preds = %start
  %_10.i.i.i = getelementptr inbounds nuw i8, ptr %a, i64 2
  %_11.i.i.i = getelementptr inbounds nuw i8, ptr %b, i64 2
  %lhs.i8.i.i.i = load i16, ptr %_10.i.i.i, align 2, !noundef !24
  %rhs.i9.i.i.i = load i16, ptr %_11.i.i.i, align 2, !noundef !24
  %_5.i10.not.i.i.i = icmp eq i16 %lhs.i8.i.i.i, %rhs.i9.i.i.i
  br i1 %_5.i10.not.i.i.i, label %bb1.1.i, label %bb8.split.loop.exit14.i

bb1.1.i:                                          ; preds = %bb6.i.i.i
  %_22.1.i = getelementptr inbounds nuw i8, ptr %a, i64 4
  %_25.1.i = getelementptr inbounds nuw i8, ptr %b, i64 4
  %lhs.i.i.i.1.i = load i16, ptr %_22.1.i, align 2, !noundef !24
  %rhs.i.i.i.1.i = load i16, ptr %_25.1.i, align 2, !noundef !24
  %_5.i.i.i.1.i = icmp eq i16 %lhs.i.i.i.1.i, %rhs.i.i.i.1.i
  br i1 %_5.i.i.i.1.i, label %bb6.i.i.1.i, label %bb8.split.loop.exit12.i

bb6.i.i.1.i:                                      ; preds = %bb1.1.i
  %_10.i.i.1.i = getelementptr inbounds nuw i8, ptr %a, i64 6
  %_11.i.i.1.i = getelementptr inbounds nuw i8, ptr %b, i64 6
  %lhs.i8.i.i.1.i = load i16, ptr %_10.i.i.1.i, align 2, !noundef !24
  %rhs.i9.i.i.1.i = load i16, ptr %_11.i.i.1.i, align 2, !noundef !24
  %_5.i10.not.i.i.1.i = icmp eq i16 %lhs.i8.i.i.1.i, %rhs.i9.i.i.1.i
  br i1 %_5.i10.not.i.i.1.i, label %_ZN4core5slice3cmp13chaining_impl17hcb28312734696fd1E.exit, label %bb8.split.loop.exit14.i

bb8.split.loop.exit12.i:                          ; preds = %bb1.1.i, %start
  %lhs.i.i.i.lcssa.i = phi i16 [ %lhs.i.i.i.i, %start ], [ %lhs.i.i.i.1.i, %bb1.1.i ]
  %rhs.i.i.i.lcssa.i = phi i16 [ %rhs.i.i.i.i, %start ], [ %rhs.i.i.i.1.i, %bb1.1.i ]
  %_6.i.i.i.le.i = icmp sle i16 %lhs.i.i.i.lcssa.i, %rhs.i.i.i.lcssa.i
  %0 = zext i1 %_6.i.i.i.le.i to i8
  br label %_ZN4core5slice3cmp13chaining_impl17hcb28312734696fd1E.exit

bb8.split.loop.exit14.i:                          ; preds = %bb6.i.i.1.i, %bb6.i.i.i
  %lhs.i8.i.i.lcssa.i = phi i16 [ %lhs.i8.i.i.i, %bb6.i.i.i ], [ %lhs.i8.i.i.1.i, %bb6.i.i.1.i ]
  %rhs.i9.i.i.lcssa.i = phi i16 [ %rhs.i9.i.i.i, %bb6.i.i.i ], [ %rhs.i9.i.i.1.i, %bb6.i.i.1.i ]
  %narrow.i.i.le.i = icmp ult i16 %lhs.i8.i.i.lcssa.i, %rhs.i9.i.i.lcssa.i
  %_0.sroa.0.0.i12.i.i.le.i = zext i1 %narrow.i.i.le.i to i8
  br label %_ZN4core5slice3cmp13chaining_impl17hcb28312734696fd1E.exit

_ZN4core5slice3cmp13chaining_impl17hcb28312734696fd1E.exit: ; preds = %bb6.i.i.1.i, %bb8.split.loop.exit12.i, %bb8.split.loop.exit14.i
  %_0.sroa.0.0.i = phi i8 [ %0, %bb8.split.loop.exit12.i ], [ %_0.sroa.0.0.i12.i.i.le.i, %bb8.split.loop.exit14.i ], [ 2, %bb6.i.i.1.i ]
  %.not = icmp eq i8 %_0.sroa.0.0.i, 2
  %1 = trunc i8 %_0.sroa.0.0.i to i1
  %_0.sroa.0.0 = or i1 %.not, %1
  ret i1 %_0.sroa.0.0
}

!24 = !{}

The part I want to draw attention to specifically is the end:

bb8.split.loop.exit12.i:                          ; preds = %bb1.1.i, %start
  … elided …
  %0 = zext i1 %_6.i.i.i.le.i to i8
  br label %_ZN4core5slice3cmp13chaining_impl17hcb28312734696fd1E.exit

bb8.split.loop.exit14.i:                          ; preds = %bb6.i.i.1.i, %bb6.i.i.i
  … elided …
  %_0.sroa.0.0.i12.i.i.le.i = zext i1 %narrow.i.i.le.i to i8
  br label %_ZN4core5slice3cmp13chaining_impl17hcb28312734696fd1E.exit

_ZN4core5slice3cmp13chaining_impl17hcb28312734696fd1E.exit: ; preds = %bb6.i.i.1.i, %bb8.split.loop.exit12.i, %bb8.split.loop.exit14.i
  %_0.sroa.0.0.i = phi i8 [ %0, %bb8.split.loop.exit12.i ], [ %_0.sroa.0.0.i12.i.i.le.i, %bb8.split.loop.exit14.i ], [ 2, %bb6.i.i.1.i ]
  %.not = icmp eq i8 %_0.sroa.0.0.i, 2
  %1 = trunc i8 %_0.sroa.0.0.i to i1
  %_0.sroa.0.0 = or i1 %.not, %1
  ret i1 %_0.sroa.0.0
}

That 2 came from Option<bool>::None in Rust, but it would be really nice if LLVM could notice that it being specifically 2 is not something that makes it to the output at all, and thus it could be changed to something more convenient for the logic.

Specifically, in this case the other two phi inputs are just zexted icmps, and the function is returning i1, so if it's changed to 1 then everything else in that block can be greatly simplified:

_ZN4core5slice3cmp13chaining_impl17hcb28312734696fd1E.exit: ; preds = %bb6.i.i.1.i, %bb8.split.loop.exit12.i, %bb8.split.loop.exit14.i
  %_0.sroa.0.0.i = phi i8 [ %0, %bb8.split.loop.exit12.i ], [ %_0.sroa.0.0.i12.i.i.le.i, %bb8.split.loop.exit14.i ], [ 1, %bb6.i.i.1.i ]
  %1 = trunc nuw i8 %_0.sroa.0.0.i to i1
  ret i1 %1
}

That way there's no need for the eq+or, and the trunc becomes nuw which could collapse into the phi and the predecessors too.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions