@@ -1049,34 +1049,36 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
1049
1049
; CHECK-NEXT: sub sp, #32
1050
1050
; CHECK-NEXT: cmp r3, #8
1051
1051
; CHECK-NEXT: blo.w .LBB16_12
1052
- ; CHECK-NEXT: @ %bb.1: @ %entry
1053
- ; CHECK-NEXT: lsrs.w r12, r3, #2
1052
+ ; CHECK-NEXT: @ %bb.1: @ %if.then
1053
+ ; CHECK-NEXT: movs r7, #0
1054
+ ; CHECK-NEXT: cmp.w r7, r3, lsr #2
1054
1055
; CHECK-NEXT: beq.w .LBB16_12
1055
1056
; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph
1056
- ; CHECK-NEXT: ldrh r6, [r0]
1057
- ; CHECK-NEXT: movs r4, #1
1058
- ; CHECK-NEXT: ldrd r5, r10, [r0, #4]
1059
- ; CHECK-NEXT: sub.w r3, r6, #8
1060
- ; CHECK-NEXT: add.w r0, r3, r3, lsr #29
1061
- ; CHECK-NEXT: asrs r7, r0, #3
1062
- ; CHECK-NEXT: cmp r7, #1
1057
+ ; CHECK-NEXT: ldrh r4, [r0]
1058
+ ; CHECK-NEXT: lsr.w r10, r3, #2
1059
+ ; CHECK-NEXT: ldrd r5, r12, [r0, #4]
1060
+ ; CHECK-NEXT: movs r3, #1
1061
+ ; CHECK-NEXT: sub.w r7, r4, #8
1062
+ ; CHECK-NEXT: add.w r0, r7, r7, lsr #29
1063
+ ; CHECK-NEXT: asrs r6, r0, #3
1064
+ ; CHECK-NEXT: cmp r6, #1
1063
1065
; CHECK-NEXT: it gt
1064
- ; CHECK-NEXT: asrgt r4 , r0, #3
1065
- ; CHECK-NEXT: add.w r0, r5, r6 , lsl #2
1066
+ ; CHECK-NEXT: asrgt r3 , r0, #3
1067
+ ; CHECK-NEXT: add.w r0, r5, r4 , lsl #2
1066
1068
; CHECK-NEXT: sub.w r9, r0, #4
1067
- ; CHECK-NEXT: rsbs r0, r6 , #0
1068
- ; CHECK-NEXT: str r4 , [sp, #4] @ 4-byte Spill
1069
- ; CHECK-NEXT: and r4, r3 , #7
1069
+ ; CHECK-NEXT: rsbs r0, r4 , #0
1070
+ ; CHECK-NEXT: str r3 , [sp, #4] @ 4-byte Spill
1071
+ ; CHECK-NEXT: and r3, r7 , #7
1070
1072
; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
1071
- ; CHECK-NEXT: add.w r0, r10 , #32
1072
- ; CHECK-NEXT: str r6 , [sp, #20] @ 4-byte Spill
1073
+ ; CHECK-NEXT: add.w r0, r12 , #32
1074
+ ; CHECK-NEXT: str r4 , [sp, #20] @ 4-byte Spill
1073
1075
; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
1074
- ; CHECK-NEXT: str r4 , [sp, #12] @ 4-byte Spill
1076
+ ; CHECK-NEXT: str r3 , [sp, #12] @ 4-byte Spill
1075
1077
; CHECK-NEXT: b .LBB16_4
1076
1078
; CHECK-NEXT: .LBB16_3: @ %while.end
1077
1079
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
1078
1080
; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload
1079
- ; CHECK-NEXT: subs.w r12, r12 , #1
1081
+ ; CHECK-NEXT: subs.w r10, r10 , #1
1080
1082
; CHECK-NEXT: vstrb.8 q0, [r2], #16
1081
1083
; CHECK-NEXT: add.w r0, r5, r0, lsl #2
1082
1084
; CHECK-NEXT: add.w r5, r0, #16
@@ -1085,25 +1087,25 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
1085
1087
; CHECK-NEXT: @ =>This Loop Header: Depth=1
1086
1088
; CHECK-NEXT: @ Child Loop BB16_6 Depth 2
1087
1089
; CHECK-NEXT: @ Child Loop BB16_10 Depth 2
1088
- ; CHECK-NEXT: add.w lr, r10, #8
1089
1090
; CHECK-NEXT: vldrw.u32 q0, [r1], #16
1090
- ; CHECK-NEXT: ldrd r3, r7, [r10]
1091
- ; CHECK-NEXT: ldm.w lr, {r0, r4, r6, lr}
1092
- ; CHECK-NEXT: ldrd r11, r8, [r10, #24]
1091
+ ; CHECK-NEXT: ldrd r7, r6, [r12]
1092
+ ; CHECK-NEXT: ldrd r0, r4, [r12, #8]
1093
+ ; CHECK-NEXT: ldrd r3, lr, [r12, #16]
1094
+ ; CHECK-NEXT: ldrd r11, r8, [r12, #24]
1093
1095
; CHECK-NEXT: vstrb.8 q0, [r9], #16
1094
1096
; CHECK-NEXT: vldrw.u32 q0, [r5], #32
1095
1097
; CHECK-NEXT: strd r9, r1, [sp, #24] @ 8-byte Folded Spill
1096
1098
; CHECK-NEXT: vldrw.u32 q1, [r5, #-28]
1097
- ; CHECK-NEXT: vmul.f32 q0, q0, r3
1099
+ ; CHECK-NEXT: vmul.f32 q0, q0, r7
1098
1100
; CHECK-NEXT: vldrw.u32 q6, [r5, #-24]
1099
1101
; CHECK-NEXT: vldrw.u32 q4, [r5, #-20]
1100
- ; CHECK-NEXT: vfma.f32 q0, q1, r7
1102
+ ; CHECK-NEXT: vfma.f32 q0, q1, r6
1101
1103
; CHECK-NEXT: vldrw.u32 q5, [r5, #-16]
1102
1104
; CHECK-NEXT: vfma.f32 q0, q6, r0
1103
1105
; CHECK-NEXT: vldrw.u32 q2, [r5, #-12]
1104
1106
; CHECK-NEXT: vfma.f32 q0, q4, r4
1105
1107
; CHECK-NEXT: vldrw.u32 q3, [r5, #-8]
1106
- ; CHECK-NEXT: vfma.f32 q0, q5, r6
1108
+ ; CHECK-NEXT: vfma.f32 q0, q5, r3
1107
1109
; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
1108
1110
; CHECK-NEXT: vfma.f32 q0, q2, lr
1109
1111
; CHECK-NEXT: vldrw.u32 q1, [r5, #-4]
@@ -1147,26 +1149,26 @@ define void @fir(%struct.arm_fir_instance_f32* nocapture readonly %S, float* noc
1147
1149
; CHECK-NEXT: .LBB16_8: @ %for.end
1148
1150
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
1149
1151
; CHECK-NEXT: ldrd r9, r1, [sp, #24] @ 8-byte Folded Reload
1150
- ; CHECK-NEXT: ldr r4 , [sp, #12] @ 4-byte Reload
1151
- ; CHECK-NEXT: cmp.w r4 , #0
1152
+ ; CHECK-NEXT: ldr r3 , [sp, #12] @ 4-byte Reload
1153
+ ; CHECK-NEXT: cmp.w r3 , #0
1152
1154
; CHECK-NEXT: beq .LBB16_3
1153
1155
; CHECK-NEXT: b .LBB16_9
1154
1156
; CHECK-NEXT: .LBB16_9: @ %while.body76.preheader
1155
1157
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
1156
- ; CHECK-NEXT: mov r3 , r5
1157
- ; CHECK-NEXT: mov lr, r4
1158
+ ; CHECK-NEXT: mov r6 , r5
1159
+ ; CHECK-NEXT: mov lr, r3
1158
1160
; CHECK-NEXT: .LBB16_10: @ %while.body76
1159
1161
; CHECK-NEXT: @ Parent Loop BB16_4 Depth=1
1160
1162
; CHECK-NEXT: @ => This Inner Loop Header: Depth=2
1161
1163
; CHECK-NEXT: ldr r0, [r7], #4
1162
- ; CHECK-NEXT: vldrw.u32 q1, [r3 ], #4
1164
+ ; CHECK-NEXT: vldrw.u32 q1, [r6 ], #4
1163
1165
; CHECK-NEXT: subs.w lr, lr, #1
1164
1166
; CHECK-NEXT: vfma.f32 q0, q1, r0
1165
1167
; CHECK-NEXT: bne .LBB16_10
1166
1168
; CHECK-NEXT: b .LBB16_11
1167
1169
; CHECK-NEXT: .LBB16_11: @ %while.end.loopexit
1168
1170
; CHECK-NEXT: @ in Loop: Header=BB16_4 Depth=1
1169
- ; CHECK-NEXT: add.w r5, r5, r4 , lsl #2
1171
+ ; CHECK-NEXT: add.w r5, r5, r3 , lsl #2
1170
1172
; CHECK-NEXT: b .LBB16_3
1171
1173
; CHECK-NEXT: .LBB16_12: @ %if.end
1172
1174
; CHECK-NEXT: add sp, #32
0 commit comments