Skip to content

Commit dce490e

Browse files
authored
[RISCV] Custom type legalize MVT::i8 BITREVERSE to BREV8. (#142001)
If we're only reversing a single byte, we can use BREV8 directly. If we let it type legalize we'll get (srl (bitreverse X), XLen-8). In op legalization, we'll expand that to (srl (brev8 (bswap X)), XLen - 8). Then, SimplifyDemandedBits can reduce it to (srl (brev8 (shl X, XLen - 8)), XLen - 8). We could add a DAGCombine to pull the shl through the brev8 to put it next to the srl which will allow it to become (and (brev8 X), 255). Unless we can prove the upper XLen-8 bits are 0 or that they aren't demanded, we can't remove the `and`. By emitting BREV8 directly when we still know the type is i8, we can avoid this. We already DAGCombine i16 and i32 (bitreverse (bswap X)) to BREV8 early for the same reason. I've added an i7 test case so we can still see the opportunity for improvement on weird sizes. Fixes the RISC-V part of #141863.
1 parent 4811c67 commit dce490e

File tree

2 files changed

+185
-4
lines changed

2 files changed

+185
-4
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
390390
// Zbkb can use rev8+brev8 to implement bitreverse.
391391
setOperationAction(ISD::BITREVERSE, XLenVT,
392392
Subtarget.hasStdExtZbkb() ? Custom : Expand);
393+
if (Subtarget.hasStdExtZbkb())
394+
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
393395
}
394396

395397
if (Subtarget.hasStdExtZbb() ||
@@ -14190,6 +14192,17 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1419014192
}
1419114193
break;
1419214194
}
14195+
case ISD::BITREVERSE: {
14196+
assert(N->getValueType(0) == MVT::i8 && Subtarget.hasStdExtZbkb() &&
14197+
"Unexpected custom legalisation");
14198+
MVT XLenVT = Subtarget.getXLenVT();
14199+
SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
14200+
SDValue NewRes = DAG.getNode(RISCVISD::BREV8, DL, XLenVT, NewOp);
14201+
// ReplaceNodeResults requires we maintain the same type for the return
14202+
// value.
14203+
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, NewRes));
14204+
break;
14205+
}
1419314206
case RISCVISD::BREV8:
1419414207
case RISCVISD::ORC_B: {
1419514208
MVT VT = N->getSimpleValueType(0);

llvm/test/CodeGen/RISCV/bswap-bitreverse.ll

Lines changed: 172 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,178 @@ define i64 @test_bswap_i64(i64 %a) nounwind {
166166
ret i64 %tmp
167167
}
168168

169+
define i7 @test_bitreverse_i7(i7 %a) nounwind {
170+
; RV32I-LABEL: test_bitreverse_i7:
171+
; RV32I: # %bb.0:
172+
; RV32I-NEXT: srli a1, a0, 8
173+
; RV32I-NEXT: lui a2, 16
174+
; RV32I-NEXT: srli a3, a0, 24
175+
; RV32I-NEXT: addi a2, a2, -256
176+
; RV32I-NEXT: and a1, a1, a2
177+
; RV32I-NEXT: and a2, a0, a2
178+
; RV32I-NEXT: slli a0, a0, 24
179+
; RV32I-NEXT: or a1, a1, a3
180+
; RV32I-NEXT: lui a3, 61681
181+
; RV32I-NEXT: slli a2, a2, 8
182+
; RV32I-NEXT: or a0, a0, a2
183+
; RV32I-NEXT: lui a2, 209715
184+
; RV32I-NEXT: addi a3, a3, -241
185+
; RV32I-NEXT: or a0, a0, a1
186+
; RV32I-NEXT: srli a1, a0, 4
187+
; RV32I-NEXT: and a0, a0, a3
188+
; RV32I-NEXT: and a1, a1, a3
189+
; RV32I-NEXT: lui a3, 344064
190+
; RV32I-NEXT: addi a2, a2, 819
191+
; RV32I-NEXT: slli a0, a0, 4
192+
; RV32I-NEXT: or a0, a1, a0
193+
; RV32I-NEXT: srli a1, a0, 2
194+
; RV32I-NEXT: and a0, a0, a2
195+
; RV32I-NEXT: and a1, a1, a2
196+
; RV32I-NEXT: lui a2, 348160
197+
; RV32I-NEXT: slli a0, a0, 2
198+
; RV32I-NEXT: or a0, a1, a0
199+
; RV32I-NEXT: srli a1, a0, 1
200+
; RV32I-NEXT: and a0, a0, a2
201+
; RV32I-NEXT: and a1, a1, a3
202+
; RV32I-NEXT: slli a0, a0, 1
203+
; RV32I-NEXT: or a0, a1, a0
204+
; RV32I-NEXT: srli a0, a0, 25
205+
; RV32I-NEXT: ret
206+
;
207+
; RV64I-LABEL: test_bitreverse_i7:
208+
; RV64I: # %bb.0:
209+
; RV64I-NEXT: srli a1, a0, 40
210+
; RV64I-NEXT: lui a2, 16
211+
; RV64I-NEXT: srli a3, a0, 56
212+
; RV64I-NEXT: srli a4, a0, 24
213+
; RV64I-NEXT: lui a5, 4080
214+
; RV64I-NEXT: srli a6, a0, 8
215+
; RV64I-NEXT: srliw a7, a0, 24
216+
; RV64I-NEXT: addiw a2, a2, -256
217+
; RV64I-NEXT: and a1, a1, a2
218+
; RV64I-NEXT: or a1, a1, a3
219+
; RV64I-NEXT: lui a3, 61681
220+
; RV64I-NEXT: and a4, a4, a5
221+
; RV64I-NEXT: srliw a6, a6, 24
222+
; RV64I-NEXT: slli a6, a6, 24
223+
; RV64I-NEXT: or a4, a6, a4
224+
; RV64I-NEXT: lui a6, 209715
225+
; RV64I-NEXT: and a5, a0, a5
226+
; RV64I-NEXT: slli a7, a7, 32
227+
; RV64I-NEXT: addiw a3, a3, -241
228+
; RV64I-NEXT: addiw a6, a6, 819
229+
; RV64I-NEXT: slli a5, a5, 24
230+
; RV64I-NEXT: or a5, a5, a7
231+
; RV64I-NEXT: slli a7, a3, 32
232+
; RV64I-NEXT: add a3, a3, a7
233+
; RV64I-NEXT: slli a7, a6, 32
234+
; RV64I-NEXT: add a6, a6, a7
235+
; RV64I-NEXT: or a1, a4, a1
236+
; RV64I-NEXT: and a2, a0, a2
237+
; RV64I-NEXT: slli a0, a0, 56
238+
; RV64I-NEXT: slli a2, a2, 40
239+
; RV64I-NEXT: or a0, a0, a2
240+
; RV64I-NEXT: li a2, 21
241+
; RV64I-NEXT: or a0, a0, a5
242+
; RV64I-NEXT: li a4, 85
243+
; RV64I-NEXT: slli a2, a2, 58
244+
; RV64I-NEXT: slli a4, a4, 56
245+
; RV64I-NEXT: or a0, a0, a1
246+
; RV64I-NEXT: srli a1, a0, 4
247+
; RV64I-NEXT: and a0, a0, a3
248+
; RV64I-NEXT: and a1, a1, a3
249+
; RV64I-NEXT: slli a0, a0, 4
250+
; RV64I-NEXT: or a0, a1, a0
251+
; RV64I-NEXT: srli a1, a0, 2
252+
; RV64I-NEXT: and a0, a0, a6
253+
; RV64I-NEXT: and a1, a1, a6
254+
; RV64I-NEXT: slli a0, a0, 2
255+
; RV64I-NEXT: or a0, a1, a0
256+
; RV64I-NEXT: srli a1, a0, 1
257+
; RV64I-NEXT: and a0, a0, a4
258+
; RV64I-NEXT: and a1, a1, a2
259+
; RV64I-NEXT: slli a0, a0, 1
260+
; RV64I-NEXT: or a0, a1, a0
261+
; RV64I-NEXT: srli a0, a0, 57
262+
; RV64I-NEXT: ret
263+
;
264+
; RV32ZBB-LABEL: test_bitreverse_i7:
265+
; RV32ZBB: # %bb.0:
266+
; RV32ZBB-NEXT: rev8 a0, a0
267+
; RV32ZBB-NEXT: lui a1, 61681
268+
; RV32ZBB-NEXT: srli a2, a0, 4
269+
; RV32ZBB-NEXT: addi a1, a1, -241
270+
; RV32ZBB-NEXT: and a2, a2, a1
271+
; RV32ZBB-NEXT: and a0, a0, a1
272+
; RV32ZBB-NEXT: lui a1, 209715
273+
; RV32ZBB-NEXT: addi a1, a1, 819
274+
; RV32ZBB-NEXT: slli a0, a0, 4
275+
; RV32ZBB-NEXT: or a0, a2, a0
276+
; RV32ZBB-NEXT: srli a2, a0, 2
277+
; RV32ZBB-NEXT: and a0, a0, a1
278+
; RV32ZBB-NEXT: and a1, a2, a1
279+
; RV32ZBB-NEXT: lui a2, 344064
280+
; RV32ZBB-NEXT: slli a0, a0, 2
281+
; RV32ZBB-NEXT: or a0, a1, a0
282+
; RV32ZBB-NEXT: lui a1, 348160
283+
; RV32ZBB-NEXT: and a1, a0, a1
284+
; RV32ZBB-NEXT: srli a0, a0, 1
285+
; RV32ZBB-NEXT: and a0, a0, a2
286+
; RV32ZBB-NEXT: slli a1, a1, 1
287+
; RV32ZBB-NEXT: or a0, a0, a1
288+
; RV32ZBB-NEXT: srli a0, a0, 25
289+
; RV32ZBB-NEXT: ret
290+
;
291+
; RV64ZBB-LABEL: test_bitreverse_i7:
292+
; RV64ZBB: # %bb.0:
293+
; RV64ZBB-NEXT: rev8 a0, a0
294+
; RV64ZBB-NEXT: lui a1, 61681
295+
; RV64ZBB-NEXT: lui a2, 209715
296+
; RV64ZBB-NEXT: addiw a1, a1, -241
297+
; RV64ZBB-NEXT: addiw a2, a2, 819
298+
; RV64ZBB-NEXT: slli a3, a1, 32
299+
; RV64ZBB-NEXT: add a1, a1, a3
300+
; RV64ZBB-NEXT: slli a3, a2, 32
301+
; RV64ZBB-NEXT: add a2, a2, a3
302+
; RV64ZBB-NEXT: srli a3, a0, 4
303+
; RV64ZBB-NEXT: and a3, a3, a1
304+
; RV64ZBB-NEXT: and a0, a0, a1
305+
; RV64ZBB-NEXT: li a1, 21
306+
; RV64ZBB-NEXT: slli a0, a0, 4
307+
; RV64ZBB-NEXT: or a0, a3, a0
308+
; RV64ZBB-NEXT: srli a3, a0, 2
309+
; RV64ZBB-NEXT: and a0, a0, a2
310+
; RV64ZBB-NEXT: and a2, a3, a2
311+
; RV64ZBB-NEXT: li a3, 85
312+
; RV64ZBB-NEXT: slli a1, a1, 58
313+
; RV64ZBB-NEXT: slli a3, a3, 56
314+
; RV64ZBB-NEXT: slli a0, a0, 2
315+
; RV64ZBB-NEXT: or a0, a2, a0
316+
; RV64ZBB-NEXT: srli a2, a0, 1
317+
; RV64ZBB-NEXT: and a0, a0, a3
318+
; RV64ZBB-NEXT: and a1, a2, a1
319+
; RV64ZBB-NEXT: slli a0, a0, 1
320+
; RV64ZBB-NEXT: or a0, a1, a0
321+
; RV64ZBB-NEXT: srli a0, a0, 57
322+
; RV64ZBB-NEXT: ret
323+
;
324+
; RV32ZBKB-LABEL: test_bitreverse_i7:
325+
; RV32ZBKB: # %bb.0:
326+
; RV32ZBKB-NEXT: slli a0, a0, 24
327+
; RV32ZBKB-NEXT: brev8 a0, a0
328+
; RV32ZBKB-NEXT: srli a0, a0, 25
329+
; RV32ZBKB-NEXT: ret
330+
;
331+
; RV64ZBKB-LABEL: test_bitreverse_i7:
332+
; RV64ZBKB: # %bb.0:
333+
; RV64ZBKB-NEXT: slli a0, a0, 56
334+
; RV64ZBKB-NEXT: brev8 a0, a0
335+
; RV64ZBKB-NEXT: srli a0, a0, 57
336+
; RV64ZBKB-NEXT: ret
337+
%tmp = call i7 @llvm.bitreverse.i7(i7 %a)
338+
ret i7 %tmp
339+
}
340+
169341
define i8 @test_bitreverse_i8(i8 %a) nounwind {
170342
; RV32I-LABEL: test_bitreverse_i8:
171343
; RV32I: # %bb.0:
@@ -245,16 +417,12 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind {
245417
;
246418
; RV32ZBKB-LABEL: test_bitreverse_i8:
247419
; RV32ZBKB: # %bb.0:
248-
; RV32ZBKB-NEXT: slli a0, a0, 24
249420
; RV32ZBKB-NEXT: brev8 a0, a0
250-
; RV32ZBKB-NEXT: srli a0, a0, 24
251421
; RV32ZBKB-NEXT: ret
252422
;
253423
; RV64ZBKB-LABEL: test_bitreverse_i8:
254424
; RV64ZBKB: # %bb.0:
255-
; RV64ZBKB-NEXT: slli a0, a0, 56
256425
; RV64ZBKB-NEXT: brev8 a0, a0
257-
; RV64ZBKB-NEXT: srli a0, a0, 56
258426
; RV64ZBKB-NEXT: ret
259427
%tmp = call i8 @llvm.bitreverse.i8(i8 %a)
260428
ret i8 %tmp

0 commit comments

Comments
 (0)