-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU][MC] Allow dpp in v_dot2_f32_bf16 for GFX11 and 12 #142451
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][MC] Allow dpp in v_dot2_f32_bf16 for GFX11 and 12 #142451
Conversation
Allowing the dpp operand in v_dot2_f32_bf16 for GFX11 and 12.
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Jun Wang (jwanggit86) ChangesAllowing the dpp operand in v_dot2_f32_bf16 for GFX11 and 12. Full diff: https://github.com/llvm/llvm-project/pull/142451.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 06ee41acf41ac..ab30dda8bf23e 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -1878,6 +1878,8 @@ defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>;
defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>;
defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>;
+defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX12Gen, 0x1a>;
+
//===----------------------------------------------------------------------===//
// GFX11
//===----------------------------------------------------------------------===//
@@ -1887,7 +1889,7 @@ multiclass VOP3P_Real_gfx11_gfx12<bits<8> op> :
defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11_gfx12<0x16>;
defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11_gfx12<0x18>;
-defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11_gfx12<0x1a>;
+defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX11Gen, 0x1a>;
let AssemblerPredicate = isGFX11Plus in {
def : AMDGPUMnemonicAlias<"v_dot4_i32_i8", "v_dot4_i32_iu8">;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
index 829b0ebb8d8ac..49da767b881f9 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
@@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,1,0] neg_hi:[0,1,0] clamp
// GFX11: [0xff,0xc2,0x1a,0xcc,0xff,0xd6,0xc0,0x5b,0x0b,0xfe,0x00,0x00]
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]
+
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]
+
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]
+
v_dot2_f32_f16 v5, v1, v2, s3
// GFX11: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
index db9ad3d2a8418..3287251f1183c 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
@@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0]
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,0,0] neg_hi:[0,0,0] clamp
// GFX12: [0xff,0xc0,0x1a,0xcc,0xff,0xd6,0xc0,0x1b,0x0b,0xfe,0x00,0x00]
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0xff]
+
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
+// GFX11: [0x01,0x40,0x1a,0xcc,0xfa,0x06,0x12,0x1c,0x02,0xe4,0x00,0x00]
+
+v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: [0x01,0x40,0x1a,0xcc,0xe9,0x06,0x12,0x1c,0x02,0x77,0x39,0x05]
+
v_dot2_f32_f16 v5, v1, v2, s3
// GFX12: [0x05,0x40,0x13,0xcc,0x01,0x05,0x0e,0x18]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing dpp8 and disasm tests.
@@ -1878,6 +1878,8 @@ defm V_DOT4_F32_BF8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x25>; | |||
defm V_DOT4_F32_FP8_FP8 : VOP3P_Realtriple<GFX12Gen, 0x26>; | |||
defm V_DOT4_F32_BF8_BF8 : VOP3P_Realtriple<GFX12Gen, 0x27>; | |||
|
|||
defm V_DOT2_F32_BF16 : VOP3P_Realtriple<GFX12Gen, 0x1a>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is VOP3P_Realtriple_gfx11_gfx12 already.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Where is it? I couldn't find it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I see. Take it from downstream, added 2 weeks ago by Mirko.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Included in latest commit.
@@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0] | |||
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,1,0] neg_hi:[0,1,0] clamp | |||
// GFX11: [0xff,0xc2,0x1a,0xcc,0xff,0xd6,0xc0,0x5b,0x0b,0xfe,0x00,0x00] | |||
|
|||
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should go into gfx11_asm_vop3p_dpp16.s.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
@@ -45,6 +45,15 @@ v_dot2_f32_bf16 v5, src_scc, vcc_lo, src_scc neg_lo:[1,0,0] neg_hi:[1,0,0] | |||
v_dot2_f32_bf16 v255, 0xfe0b, vcc_hi, 0.5 neg_lo:[0,0,0] neg_hi:[0,0,0] clamp | |||
// GFX12: [0xff,0xc0,0x1a,0xcc,0xff,0xd6,0xc0,0x1b,0x0b,0xfe,0x00,0x00] | |||
|
|||
v_dot2_f32_bf16_e64_dpp v1, v2, v3, v4 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should go into gfx12_asm_vop3p_dpp16.s.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
(1) created VOP3P_Realtriple_gfx11_gfx12 (2) updated asm tests (3) created disasm tests.
Added. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
df4fd57
to
1b4c538
Compare
Allowing the dpp operand in v_dot2_f32_bf16 for GFX11 and 12.