llvm · qcolombet · Jun 8, 2024 · Jun 7, 2024 · Jun 7, 2024 · Jun 7, 2024
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -26586,7 +26586,12 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
     }
   }
 
-  return SDValue();
+  // Sometimes constants manage to survive very late in the pipeline, e.g.,
+  // because they are wrapped inside the <1 x f16> type. Try one last time to
+  // get rid of them.
+  SDValue Folded = DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N),
+                                              N->getValueType(0), {N0});
+  return Folded;
 }
 
 SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {

diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -1489,9 +1489,8 @@ define amdgpu_kernel void @v_no_clamp_add_src_v2f16_f16_src(ptr addrspace(1) %ou
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; SI-NEXT:    buffer_load_ushort v1, v[1:2], s[4:7], 0 addr64
-; SI-NEXT:    v_cvt_f32_f16_e64 v3, s6 clamp
+; SI-NEXT:    v_cvt_f16_f32_e32 v3, 0
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NEXT:    v_add_f32_e32 v1, 1.0, v1

diff --git a/llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll b/llvm/test/CodeGen/AMDGPU/select-phi-s16-fp.ll
@@ -14,9 +14,8 @@ define void @phi_vec1half_to_f32_with_const_folding(ptr addrspace(1) %dst) #0 {
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    s_mov_b32 s4, 0
-; CHECK-NEXT:    v_cvt_f32_f16_e64 v2, s4
 ; CHECK-NEXT:  ; %bb.1: ; %bb
-; CHECK-NEXT:    v_cvt_f16_f32_e64 v2, v2
+; CHECK-NEXT:    v_cvt_f16_f32_e64 v2, s4
 ; CHECK-NEXT:    s_mov_b32 s7, 0xf000
 ; CHECK-NEXT:    s_mov_b32 s6, 0
 ; CHECK-NEXT:    s_mov_b32 s4, s6

diff --git a/llvm/test/CodeGen/ARM/arm-half-promote.ll b/llvm/test/CodeGen/ARM/arm-half-promote.ll
@@ -116,9 +116,7 @@ define fastcc { <8 x half>, <8 x half> } @f3() {
 
 define void @extract_insert(ptr %dst) optnone noinline {
 ; CHECK-LABEL: extract_insert:
-; CHECK: movs r1, #0
-; CHECK: vmov s0, r1
-; CHECK: vcvtb.f32.f16 s0, s0
+; CHECK: vmov.i32 d0, #0x0
 ; CHECK: vcvtb.f16.f32 s0, s0
 ; CHECK: vmov r1, s0
 ; CHECK: strh r1, [r0]