Skip to content

[RISCV] Support instruction sizes up to 176-bits in disassembler. #90371

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,12 +656,44 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CS) const {
// TODO: This will need modification when supporting instruction set
// extensions with instructions > 32-bits (up to 176 bits wide).
// It's a 16 bit instruction if bit 0 and 1 are not 0b11.
if ((Bytes[0] & 0b11) != 0b11)
return getInstruction16(MI, Size, Bytes, Address, CS);

// It's a 32 bit instruction if bit 0 and 1 are 1.
if ((Bytes[0] & 0x3) == 0x3)
// It's a 32 bit instruction if bit 1:0 are 0b11(checked above) and bits 4:2
// are not 0b111.
if ((Bytes[0] & 0b1'1100) != 0b1'1100)
return getInstruction32(MI, Size, Bytes, Address, CS);

return getInstruction16(MI, Size, Bytes, Address, CS);
// 48-bit instructions are encoded as 0bxx011111.
if ((Bytes[0] & 0b11'1111) == 0b01'1111) {
Size = Bytes.size() >= 6 ? 6 : 0;
return MCDisassembler::Fail;
}

// 64-bit instructions are encoded as 0x0111111.
if ((Bytes[0] & 0b111'1111) == 0b011'1111) {
Size = Bytes.size() >= 8 ? 8 : 0;
return MCDisassembler::Fail;
}

// Remaining cases need to check a second byte.
if (Bytes.size() < 2) {
Size = 0;
return MCDisassembler::Fail;
}

// 80-bit through 176-bit instructions are encoded as 0bxnnnxxxx_x1111111.
// Where the number of bits is (80 + (nnn * 16)) for nnn != 0b111.
unsigned nnn = (Bytes[1] >> 4) & 0b111;
if (nnn != 0b111) {
Size = 10 + (nnn * 2);
if (Bytes.size() < Size)
Size = 0;
return MCDisassembler::Fail;
}

// Remaining encodings are reserved for > 176-bit instructions.
Size = 0;
return MCDisassembler::Fail;
}
29 changes: 29 additions & 0 deletions llvm/test/MC/RISCV/large-instructions.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \
# RUN: | llvm-objdump -d - | FileCheck %s

# CHECK: 011f 4523 8967 <unknown>
.byte 0x1f, 0x01, 0x23, 0x45, 0x67, 0x89

# CHECK: 4523013f cdab8967 <unknown>
.byte 0x3f, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd

# CHECK: 007f 4523 8967 cdab feef <unknown>
.byte 0x7f, 0x00, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe

# CHECK: 4523107f cdab8967 badcfeef <unknown>
.byte 0x7f, 0x10, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba

# CHECK: 207f 4523 8967 cdab feef badc 7698 <unknown>
.byte 0x7f, 0x20, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76

# CHECK: 4523307f cdab8967 badcfeef 32547698 <unknown>
.byte 0x7f, 0x30, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32

# CHECK: 407f 4523 8967 cdab feef badc 7698 3254 1210 <unknown>
.byte 0x7f, 0x40, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12

# CHECK: 4523507f cdab8967 badcfeef 32547698 56341210 <unknown>
.byte 0x7f, 0x50, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56

# CHECK: 607f 4523 8967 cdab feef badc 7698 3254 1210 5634 9a78 <unknown>
.byte 0x7f, 0x60, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10, 0x12, 0x34, 0x56, 0x78, 0x9a