-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Remove TH_BYPASS from CPol #139887
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Shoreshen
commented
May 14, 2025
- Remove TH_BYPASS from CPol
- Keep availability for parsing "BYPASS" as TH
- Stop printing "BYPASS" for asm printer
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: None (Shoreshen) Changes
Patch is 60.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/139887.diff 12 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 28370b8670f05..4023ce996e0b3 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5086,7 +5086,7 @@ bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
(TH == AMDGPU::CPol::TH_NT_HT)))
return PrintError("invalid th value for SMEM instruction");
- if (TH == AMDGPU::CPol::TH_BYPASS) {
+ if (TH == AMDGPU::CPol::TH_WB) { // TH_LU == TH_WB == 3
if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
(Scope == AMDGPU::CPol::SCOPE_SYS &&
@@ -6774,7 +6774,7 @@ ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
.Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
.Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
.Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
- .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
+ .Case("BYPASS", AMDGPU::CPol::TH_LU)
.Default(0xffffffff);
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index a56bca514aff3..b49889403bb88 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -206,9 +206,8 @@ void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
case AMDGPU::CPol::TH_HT:
O << "HT";
break;
- case AMDGPU::CPol::TH_BYPASS: // or LU or WB
- O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS"
- : (IsStore ? "WB" : "LU"));
+ case AMDGPU::CPol::TH_LU: // TH_LU == TH_WB == 3
+ O << (IsStore ? "WB" : "LU");
break;
case AMDGPU::CPol::TH_NT_RT:
O << "NT_RT";
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 0f603a43fd626..e75e71b064a38 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -379,7 +379,6 @@ enum CPol {
TH_RT_NT = 5, // regular (CU, SE), non-temporal (MALL)
TH_NT_HT = 6, // non-temporal (CU, SE), high-temporal (MALL)
TH_NT_WB = 7, // non-temporal (CU, SE), high-temporal with write-back (MALL)
- TH_BYPASS = 3, // only to be used with scope = 3
TH_RESERVED = 7, // unused value for load insts
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
index 2b10d469acf5c..6490320817a09 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-lastuse-metadata.ll
@@ -106,7 +106,7 @@ define amdgpu_kernel void @buffer_last_use_and_volatile_load(ptr addrspace(7) %i
; GFX12-NEXT: s_mov_b32 s13, s2
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
-; GFX12-NEXT: buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT: buffer_load_b32 v0, v0, s[8:11], null offen th:TH_LOAD_LU scope:SCOPE_SYS
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s13, s[4:5], 0x30
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x20
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
index a00af8e5b6582..a1ae26bddca59 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
@@ -71,7 +71,7 @@ define amdgpu_kernel void @flat_last_use_and_volatile_load(ptr %in, ptr %out) {
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: v_mov_b32_e32 v0, s2
; GFX12-NEXT: v_mov_b32_e32 v1, s3
-; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU scope:SCOPE_SYS
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
index 5f952b98041f3..c51532353166f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
@@ -52,7 +52,7 @@ define amdgpu_kernel void @global_last_use_and_volatile_load(ptr addrspace(1) %i
; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_LU scope:SCOPE_SYS
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
index bc905fa564f8a..d12aa49052d4a 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
@@ -51,7 +51,7 @@ define amdgpu_kernel void @private_last_use_and_volatile_load(ptr addrspace(5) %
; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX12-NEXT: v_mov_b32_e32 v0, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_BYPASS scope:SCOPE_SYS
+; GFX12-NEXT: scratch_load_b32 v1, off, s2 th:TH_LOAD_LU scope:SCOPE_SYS
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_samplecnt 0x0
; GFX12-NEXT: s_wait_loadcnt 0x0
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_features.s b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
index ba1e0d6462ac8..6d83934687176 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_features.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_features.s
@@ -76,7 +76,7 @@ buffer_load_b32 v5, off, s[8:11], s3 offset:8388607 scope:SCOPE_DEV th:TH_LOAD_N
// GFX12: buffer_load_b32 v5, off, s[8:11], s3 offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x05,0xc4,0x05,0x10,0xe8,0x00,0x00,0xff,0xff,0x7f]
tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS
-// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 scope:SCOPE_SYS th:TH_LOAD_BYPASS
-// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+// GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
index 8d8cfc172ad75..541e302c76935 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vbuffer_mtbuf.txt
@@ -36,7 +36,7 @@
# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0x68,0x02,0x00,0xff,0xff,0x7f]
0x03,0x00,0x22,0xc4,0x04,0xe0,0x68,0x02,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f]
0x03,0x00,0x22,0xc4,0x04,0xe0,0xbc,0x02,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_UINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x00,0x22,0xc4,0x04,0xe0,0x84,0x02,0x00,0xff,0xff,0x7f]
@@ -78,7 +78,7 @@
# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_USCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x40,0x22,0xc4,0x04,0xe0,0xe8,0x04,0x00,0xff,0xff,0x7f]
0x03,0x40,0x22,0xc4,0x04,0xe0,0xe8,0x04,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f]
0x03,0x40,0x22,0xc4,0x04,0xe0,0x3c,0x05,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_SSCALED] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x40,0x22,0xc4,0x04,0xe0,0x04,0x05,0x00,0xff,0xff,0x7f]
@@ -120,7 +120,7 @@
# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_UNORM] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x80,0x22,0xc4,0x04,0xe0,0x68,0x07,0x00,0xff,0xff,0x7f]
0x03,0x80,0x22,0xc4,0x04,0xe0,0x68,0x07,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f]
0x03,0x80,0x22,0xc4,0x04,0xe0,0xbc,0x07,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SNORM] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x80,0x22,0xc4,0x04,0xe0,0x84,0x07,0x00,0xff,0xff,0x7f]
@@ -162,7 +162,7 @@
# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_8_8_SINT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0xc0,0x22,0xc4,0x04,0xe0,0xe8,0x09,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x22,0xc4,0x04,0xe0,0xe8,0x09,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x22,0xc4,0x04,0xe0,0x3c,0x0a,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_UINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0xc0,0x22,0xc4,0x04,0xe0,0x04,0x0a,0x00,0xff,0xff,0x7f]
@@ -204,7 +204,7 @@
# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_SNORM] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x20,0xc4,0x04,0xe0,0x68,0x0c,0x00,0xff,0xff,0x7f]
0x03,0x00,0x20,0xc4,0x04,0xe0,0x68,0x0c,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f]
0x03,0x00,0x20,0xc4,0x04,0xe0,0xbc,0x0c,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_16_16_USCALED] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x00,0x20,0xc4,0x04,0xe0,0x84,0x0c,0x00,0xff,0xff,0x7f]
@@ -246,7 +246,7 @@
# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_FLOAT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x40,0x20,0xc4,0x04,0xe0,0xe8,0x0e,0x00,0xff,0xff,0x7f]
0x03,0x40,0x20,0xc4,0x04,0xe0,0xe8,0x0e,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f]
0x03,0x40,0x20,0xc4,0x04,0xe0,0x3c,0x0f,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_format_xy v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_10_11_11_FLOAT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x40,0x20,0xc4,0x04,0xe0,0x04,0x0f,0x00,0xff,0xff,0x7f]
@@ -288,7 +288,7 @@
# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_UINT] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x80,0x20,0xc4,0x04,0xe0,0x68,0x11,0x00,0xff,0xff,0x7f]
0x03,0x80,0x20,0xc4,0x04,0xe0,0x68,0x11,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f]
0x03,0x80,0x20,0xc4,0x04,0xe0,0xbc,0x11,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_format_xyz v[4:6], off, ttmp[4:7], s3 format:[BUF_FMT_10_10_10_2_SINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x80,0x20,0xc4,0x04,0xe0,0x84,0x11,0x00,0xff,0xff,0x7f]
@@ -330,7 +330,7 @@
# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_SSCALED] offset:8388607 th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0xc0,0x20,0xc4,0x04,0xe0,0xe8,0x13,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x20,0xc4,0x04,0xe0,0xe8,0x13,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 th:TH_LOAD_LU scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x20,0xc4,0x04,0xe0,0x3c,0x14,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_load_format_xyzw v[4:7], off, ttmp[4:7], s3 format:[BUF_FMT_2_10_10_10_UINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0xc0,0x20,0xc4,0x04,0xe0,0x04,0x14,0x00,0xff,0xff,0x7f]
@@ -372,7 +372,7 @@
# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_USCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x23,0xc4,0x04,0xe0,0x68,0x16,0x00,0xff,0xff,0x7f]
0x03,0x00,0x23,0xc4,0x04,0xe0,0x68,0x16,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f]
0x03,0x00,0x23,0xc4,0x04,0xe0,0xbc,0x16,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_d16_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_8_8_8_SSCALED] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x00,0x23,0xc4,0x04,0xe0,0x84,0x16,0x00,0xff,0xff,0x7f]
@@ -414,7 +414,7 @@
# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_SINT] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x40,0x23,0xc4,0x04,0xe0,0xe8,0x18,0x00,0xff,0xff,0x7f]
0x03,0x40,0x23,0xc4,0x04,0xe0,0xe8,0x18,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f]
0x03,0x40,0x23,0xc4,0x04,0xe0,0x3c,0x19,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_d16_format_xy v4, off, ttmp[4:7], s3 format:[BUF_FMT_32_32_FLOAT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x40,0x23,0xc4,0x04,0xe0,0x04,0x19,0x00,0xff,0xff,0x7f]
@@ -456,7 +456,7 @@
# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_SSCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x80,0x23,0xc4,0x04,0xe0,0x68,0x1b,0x00,0xff,0xff,0x7f]
0x03,0x80,0x23,0xc4,0x04,0xe0,0x68,0x1b,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f]
0x03,0x80,0x23,0xc4,0x04,0xe0,0xbc,0x1b,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_d16_format_xyz v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_16_16_16_16_UINT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0x80,0x23,0xc4,0x04,0xe0,0x84,0x1b,0x00,0xff,0xff,0x7f]
@@ -498,7 +498,7 @@
# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_SINT] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0xc0,0x23,0xc4,0x04,0xe0,0xe8,0x1d,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x23,0xc4,0x04,0xe0,0xe8,0x1d,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f]
0x03,0xc0,0x23,0xc4,0x04,0xe0,0x3c,0x1e,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_d16_format_xyzw v[4:5], off, ttmp[4:7], s3 format:[BUF_FMT_32_32_32_FLOAT] offset:8388607 scope:SCOPE_SE ; encoding: [0x03,0xc0,0x23,0xc4,0x04,0xe0,0x04,0x1e,0x00,0xff,0xff,0x7f]
@@ -540,7 +540,7 @@
# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_USCALED] offset:8388607 th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x03,0x00,0x21,0xc4,0x04,0xe0,0xe8,0x01,0x00,0xff,0xff,0x7f]
0x03,0x00,0x21,0xc4,0x04,0xe0,0xe8,0x01,0x00,0xff,0xff,0x7f
-# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f]
+# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format:[BUF_FMT_8_SSCALED] offset:8388607 th:TH_STORE_WB scope:SCOPE_SYS ; encoding: [0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f]
0x03,0x00,0x21,0xc4,0x04,0xe0,0x3c,0x02,0x00,0xff,0xff,0x7f
# GFX12: tbuffer_store_format_x v4, off, ttmp[4:7], s3 format...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
What's the rationale? |
|
Sadly I don't think the public ISA doc is too useful as a reference for assembler syntax. We generally try to follow SP3 instead, unless there is a good reason to diverge from it. |
Ah, I thought the public docs mattered more. I just looked at SP3's TH field defs, and from what I understand:
TIL then, I was mistaken. I thought BYPASS was an older spelling but it's the spelling of LU/WB for SCOPE_SYS |
I don't know what that is for. @mbrkusanin might know. |
This is how it was described in older versions of docs and how SP3 is still currently displaying th=3. Current version does not mention TH_LOAD_BYPASS or TH_STORE_BYPASS. Either SP3 or the docs should be changed because now we have a mismatch. I do not know which one. |
This needs (internal) confirmation from HW folks. |