Description
Bugzilla Link | 40371 |
Version | trunk |
OS | Linux |
Depends On | #34577 #39193 |
Attachments | test.ll |
CC | @chandlerc,@hfinkel,@zhengyang92,@nunoplopes,@RalfJung,@regehr,@sanjoy |
Extended Description
$ cat test.ll
; while (p != q) p++;
; return p;
define i8* @iterate_until_p_becomes_q(i8* %p, i8* %q) {
label:
br label %while.cond
while.cond:
%p.addr.0 = phi i8* [ %p, %label ], [ %incdec.ptr, %while.body ]
%cmp = icmp ne i8* %p.addr.0, %q
br i1 %cmp, label %while.body, label %while.end
while.body:
%incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i32 1
br label %while.cond
while.end:
ret i8* %p.addr.0
}
After opt test.ll -indvars -S -o -
, the returned value %p.addr.0 is replaced with %q:
define i8* @iterate_until_p_becomes_q(i8* %p, i8* %q) {
label:
br label %while.cond
while.cond: ; preds = %while.body, %label
%p.addr.0 = phi i8* [ %p, %label ], [ %incdec.ptr, %while.body ]
%cmp = icmp ne i8* %p.addr.0, %q
br i1 %cmp, label %while.body, label %while.end
while.body: ; preds = %while.cond
%incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i32 1
br label %while.cond
while.end: ; preds = %while.cond
ret i8* %q
}
This is incorrect because %p and %q may have different provenances. This causes miscompilation in this case:
char p[4]; // 0x100
char q[4]; // 0x104
char *ptr = iterate_until_p_becomes_q(&p[0], &q[0]); // ptr = &p[4]
ptr--;
*ptr = 'a'; // After the optimization, this raises UB because ptr becomes q[-1].
Interesting thing is that if %p and %q are defined as non-i8-type (e.g. i16*), LLVM compiles correctly, by introducing uglygep (https://godbolt.org/z/Rk1dWt ).