-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[AMDGPU] Implement vop3p complex pattern optmization for gisel #130234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
556f7ff
58464a3
25f7db0
daae1ae
afa6448
04a5d4c
2e587f5
c6c4b3e
a289297
dd106c7
6378180
b0feaff
53370d8
3f178d2
09abc3d
79b8992
136da47
61b4df7
97e6742
d79ac03
fc7c927
9f3a54f
bc51bf4
cafa3d1
a5c5017
47840d7
6fe4147
3b7f377
d7de92f
45ed994
2f83470
d651640
a792c1d
9ac58f9
0d59649
8390425
276e41b
1cb1651
c2eeedd
dc65247
6cd21e6
ee28947
797055d
e544665
0eac2e9
e328d7a
c1680f3
d9dc316
223dc11
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,8 +68,7 @@ define float @v_fdot2_neg_c(<2 x half> %a, <2 x half> %b, float %c) { | |
; GFX906-LABEL: v_fdot2_neg_c: | ||
; GFX906: ; %bb.0: | ||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 | ||
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 | ||
; GFX906-NEXT: v_dot2_f32_f16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] | ||
; GFX906-NEXT: s_setpc_b64 s[30:31] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: This isn't related to your patch, but it seems we're missing GFX10PLUS checks for a lot of these testcases. Could you please send a separate patch to fix that? |
||
%neg.c = fneg float %c | ||
%r = call float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %neg.c, i1 false) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -248,8 +248,7 @@ define i32 @v_sdot2_fnegf32_c(<2 x i16> %a, <2 x i16> %b, float %c) { | |
; GFX906-LABEL: v_sdot2_fnegf32_c: | ||
; GFX906: ; %bb.0: | ||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX906-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 | ||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 | ||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] | ||
; GFX906-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX908-LABEL: v_sdot2_fnegf32_c: | ||
|
@@ -263,8 +262,7 @@ define i32 @v_sdot2_fnegf32_c(<2 x i16> %a, <2 x i16> %b, float %c) { | |
; GFX10-LABEL: v_sdot2_fnegf32_c: | ||
; GFX10: ; %bb.0: | ||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX10-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 | ||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 | ||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we don't support the neg modifiers for integer operands. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't. These changes are not right. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ||
%neg.c = fneg float %c | ||
%cast.neg.c = bitcast float %neg.c to i32 | ||
|
@@ -276,8 +274,7 @@ define i32 @v_sdot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) { | |
; GFX906-LABEL: v_sdot2_fnegv2f16_c: | ||
; GFX906: ; %bb.0: | ||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX906-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 | ||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 | ||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] | ||
; GFX906-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX908-LABEL: v_sdot2_fnegv2f16_c: | ||
|
@@ -291,8 +288,7 @@ define i32 @v_sdot2_fnegv2f16_c(<2 x i16> %a, <2 x i16> %b, <2 x half> %c) { | |
; GFX10-LABEL: v_sdot2_fnegv2f16_c: | ||
; GFX10: ; %bb.0: | ||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX10-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 | ||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 | ||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 neg_lo:[0,0,1] neg_hi:[0,0,1] | ||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ||
%neg.c = fneg <2 x half> %c | ||
%cast.neg.c = bitcast <2 x half> %neg.c to i32 | ||
|
@@ -304,8 +300,7 @@ define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) { | |
; GFX906-LABEL: v_sdot2_shuffle10_a: | ||
; GFX906: ; %bb.0: | ||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX906-NEXT: v_alignbit_b32 v0, v0, v0, 16 | ||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 | ||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[0,1,1] | ||
; GFX906-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX908-LABEL: v_sdot2_shuffle10_a: | ||
|
@@ -319,8 +314,7 @@ define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) { | |
; GFX10-LABEL: v_sdot2_shuffle10_a: | ||
; GFX10: ; %bb.0: | ||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16 | ||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 | ||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[0,1,1] | ||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ||
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0> | ||
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false) | ||
|
@@ -331,8 +325,7 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) { | |
; GFX906-LABEL: v_sdot2_shuffle10_b: | ||
; GFX906: ; %bb.0: | ||
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX906-NEXT: v_alignbit_b32 v1, v1, v1, 16 | ||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 | ||
; GFX906-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 op_sel:[0,1,0] op_sel_hi:[1,0,1] | ||
; GFX906-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX908-LABEL: v_sdot2_shuffle10_b: | ||
|
@@ -346,8 +339,7 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) { | |
; GFX10-LABEL: v_sdot2_shuffle10_b: | ||
; GFX10: ; %bb.0: | ||
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16 | ||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 | ||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2 op_sel:[0,1,0] op_sel_hi:[1,0,1] | ||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ||
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0> | ||
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we negate both halves? The IR is only doing fneg on a float, not on <2 x half>.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi @rovka , to fix neg of float instead of <2 x half>:
v_fmul_v2f16_partial_neg
) for neg floatHowever, this case does not took effect since all LLT that is not <2 x Scalar Type> will be blocked for safety (here is c as float).