Description
#include <stdio.h>
__attribute__((noinline))
float f(_Float16 a, _Float16 b) {
a += b;
return a;
}
int main(void) {
printf("%f\n", f(32, 65504));
}
float
is capable of representing 65536, but _Float16
is not, and the assignment to a
means that holding the value in a larger type is not permitted: casts and assignments are supposed to discard excess precision and range. This is supposed to print inf
on all targets where _Float16
is supported.
However, on ARM (testing on QEmu), I am seeing:
$ bin/clang -O0 --target=armv7a-linux-gnueabihf test.c && qemu-arm -L /usr/arm-linux-gnueabihf a.out
inf
$ bin/clang -O1 --target=armv7a-linux-gnueabihf test.c && qemu-arm -L /usr/arm-linux-gnueabihf a.out
65536.000000
Same as -O1
for other optimisation levels too.
Clang initially generates:
; Function Attrs: noinline nounwind
define dso_local float @f(half noundef %a, half noundef %b) #0 {
entry:
%a.addr = alloca half, align 2
%b.addr = alloca half, align 2
store half %a, ptr %a.addr, align 2, !tbaa !5
store half %b, ptr %b.addr, align 2, !tbaa !5
%0 = load half, ptr %b.addr, align 2, !tbaa !5
%ext = fpext half %0 to float
%1 = load half, ptr %a.addr, align 2, !tbaa !5
%conv = fpext half %1 to float
%add = fadd float %conv, %ext
%conv1 = fptrunc float %add to half
store half %conv1, ptr %a.addr, align 2, !tbaa !5
%2 = load half, ptr %a.addr, align 2, !tbaa !5
%conv2 = fpext half %2 to float
ret float %conv2
}
That is, it performs the addition in float
, and converts the result to half
. This is a perfectly valid way of doing this.
InstCombine then transforms that as
-*** IR Dump Before InstCombinePass on f ***
+*** IR Dump After InstCombinePass on f ***
; Function Attrs: noinline nounwind
define dso_local float @f(half noundef %a, half noundef %b) local_unnamed_addr #0 {
entry:
- %ext = fpext half %b to float
- %conv = fpext half %a to float
- %add = fadd float %conv, %ext
- %conv1 = fptrunc float %add to half
+ %conv1 = fadd half %a, %b
%conv2 = fpext half %conv1 to float
ret float %conv2
}
Turning it into an fadd
on type half
directly. This is not necessarily a useful transformation when fadd half
is not a legal operation, but it is a valid transformation that preserves the behaviour.
During type legalisation, though, because of it not being a legal operation, it is again converted back to an fadd
on type float
:
@@ -1,13 +1,12 @@
t0: ch,glue = EntryToken
t2: f32,ch = CopyFromReg t0, Register:f32 %0
t5: i32 = bitcast t2
- t6: i16 = truncate t5
- t7: f16 = bitcast t6
+ t18: i32 = and t5, Constant:i32<65535>
+ t16: f32 = fp16_to_fp t18
t4: f32,ch = CopyFromReg t0, Register:f32 %1
t8: i32 = bitcast t4
- t9: i16 = truncate t8
- t10: f16 = bitcast t9
- t11: f16 = fadd t7, t10
- t12: f32 = fp_extend t11
- t14: ch,glue = CopyToReg t0, Register:f32 $s0, t12
+ t21: i32 = and t8, Constant:i32<65535>
+ t19: f32 = fp16_to_fp t21
+ t20: f32 = fadd t16, t19
+ t14: ch,glue = CopyToReg t0, Register:f32 $s0, t20
t15: ch = ARMISD::RET_GLUE t14, Register:f32 $s0, t14:1
However, in this promotion back to float
, the truncation is lost, hence the changed result. There seem to be no relevant flags (beyond general "don't optimise") to control this.
I initially thought this was another instance of #44218 but this seems like a completely different part of the compiler where things go wrong. But attempts to fix #44218 may, depending on how they are done, result in this issue.