.next().unwrap_unchecked()
on Iterator doesn't optimize as expected #107681
Description
Update: per #107681 (comment), this will be fixed with LLVM 19
(Context: this is a minimized example of the behaviour I hit while working on https://github.com/rust-lang/rust/pull/107634/files#r1096624164)
I tried this code: https://rust.godbolt.org/z/EjWscs1e9
pub unsafe fn demo_std(x: &mut Copied<Iter<'_, u32>>) -> u32 {
x.next().unwrap_unchecked()
}
I expected to see this happen: the code would read and bump the pointer in the iterator without checking the end, something like
example::demo_std:
mov rax, qword ptr [rdi + 8]
lea rcx, [rax + 4]
mov qword ptr [rdi + 8], rcx
mov eax, dword ptr [rax]
ret
Instead, this happened: it still checks the iterator ending condition
example::demo_std:
mov rax, qword ptr [rdi + 8]
cmp rax, qword ptr [rdi]
je .LBB0_1
lea rcx, [rax + 4]
mov qword ptr [rdi + 8], rcx
mov eax, dword ptr [rax]
ret
.LBB0_1:
ret
Dunno where the problem is here -- could be LLVM, could be how the functions are written, could be how MIR handles things...
LLVM after optimizing:
define noundef i32 @_ZN7example8demo_std17hd41113e4f46707e0E(ptr noalias nocapture noundef align 8 dereferenceable(16) %x) unnamed_addr #0 personality ptr @rust_eh_personality {
start:
tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
%0 = getelementptr inbounds { ptr, ptr }, ptr %x, i64 0, i32 1
%self1.i.i = load ptr, ptr %0, align 8, !alias.scope !5, !nonnull !8, !noundef !8
%self2.i.i = load ptr, ptr %x, align 8, !alias.scope !5, !nonnull !8, !noundef !8
%_10.i.i = icmp eq ptr %self1.i.i, %self2.i.i
br i1 %_10.i.i, label %"_ZN104_$LT$core..iter..adapters..copied..Copied$LT$I$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h1de76f250ae29b09E.exit", label %bb3.i.i
bb3.i.i: ; preds = %start
%1 = getelementptr inbounds i32, ptr %self1.i.i, i64 1
store ptr %1, ptr %0, align 8, !alias.scope !5
%v.i.i = load i32, ptr %self1.i.i, align 4, !alias.scope !9, !noalias !2, !noundef !8
br label %"_ZN104_$LT$core..iter..adapters..copied..Copied$LT$I$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h1de76f250ae29b09E.exit"
"_ZN104_$LT$core..iter..adapters..copied..Copied$LT$I$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$4next17h1de76f250ae29b09E.exit": ; preds = %start, %bb3.i.i
%.sroa.3.0.i.i = phi i32 [ %v.i.i, %bb3.i.i ], [ undef, %start ]
%2 = xor i1 %_10.i.i, true
tail call void @llvm.assume(i1 %2)
ret i32 %.sroa.3.0.i.i
}
Looks like CSE figured out that it's assuming the same thing as the br
condition in start
, but nothing moved it up to start
even though that block dominates the assume? If it had, then I think the the branch and compare might have properly disappeared?