@@ -836,113 +836,65 @@ define amdgpu_kernel void @s_and_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
836
836
}
837
837
838
838
define <2 x i128 > @v_and_v2i128 (<2 x i128 > %a , <2 x i128 > %b ) {
839
- ; GFX7-LABEL: v_and_v2i128:
840
- ; GFX7: ; %bb.0:
841
- ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842
- ; GFX7-NEXT: v_and_b32_e32 v0, v0, v8
843
- ; GFX7-NEXT: v_and_b32_e32 v1, v1, v9
844
- ; GFX7-NEXT: v_and_b32_e32 v2, v2, v10
845
- ; GFX7-NEXT: v_and_b32_e32 v3, v3, v11
846
- ; GFX7-NEXT: v_and_b32_e32 v4, v4, v12
847
- ; GFX7-NEXT: v_and_b32_e32 v5, v5, v13
848
- ; GFX7-NEXT: v_and_b32_e32 v6, v6, v14
849
- ; GFX7-NEXT: v_and_b32_e32 v7, v7, v15
850
- ; GFX7-NEXT: s_setpc_b64 s[30:31]
851
- ;
852
- ; GFX9-LABEL: v_and_v2i128:
853
- ; GFX9: ; %bb.0:
854
- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855
- ; GFX9-NEXT: v_and_b32_e32 v0, v0, v8
856
- ; GFX9-NEXT: v_and_b32_e32 v1, v1, v9
857
- ; GFX9-NEXT: v_and_b32_e32 v2, v2, v10
858
- ; GFX9-NEXT: v_and_b32_e32 v3, v3, v11
859
- ; GFX9-NEXT: v_and_b32_e32 v4, v4, v12
860
- ; GFX9-NEXT: v_and_b32_e32 v5, v5, v13
861
- ; GFX9-NEXT: v_and_b32_e32 v6, v6, v14
862
- ; GFX9-NEXT: v_and_b32_e32 v7, v7, v15
863
- ; GFX9-NEXT: s_setpc_b64 s[30:31]
864
- ;
865
- ; GFX8-LABEL: v_and_v2i128:
866
- ; GFX8: ; %bb.0:
867
- ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868
- ; GFX8-NEXT: v_and_b32_e32 v0, v0, v8
869
- ; GFX8-NEXT: v_and_b32_e32 v1, v1, v9
870
- ; GFX8-NEXT: v_and_b32_e32 v2, v2, v10
871
- ; GFX8-NEXT: v_and_b32_e32 v3, v3, v11
872
- ; GFX8-NEXT: v_and_b32_e32 v4, v4, v12
873
- ; GFX8-NEXT: v_and_b32_e32 v5, v5, v13
874
- ; GFX8-NEXT: v_and_b32_e32 v6, v6, v14
875
- ; GFX8-NEXT: v_and_b32_e32 v7, v7, v15
876
- ; GFX8-NEXT: s_setpc_b64 s[30:31]
877
- ;
878
- ; GFX10-LABEL: v_and_v2i128:
879
- ; GFX10: ; %bb.0:
880
- ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881
- ; GFX10-NEXT: v_and_b32_e32 v0, v0, v8
882
- ; GFX10-NEXT: v_and_b32_e32 v1, v1, v9
883
- ; GFX10-NEXT: v_and_b32_e32 v2, v2, v10
884
- ; GFX10-NEXT: v_and_b32_e32 v3, v3, v11
885
- ; GFX10-NEXT: v_and_b32_e32 v4, v4, v12
886
- ; GFX10-NEXT: v_and_b32_e32 v5, v5, v13
887
- ; GFX10-NEXT: v_and_b32_e32 v6, v6, v14
888
- ; GFX10-NEXT: v_and_b32_e32 v7, v7, v15
889
- ; GFX10-NEXT: s_setpc_b64 s[30:31]
839
+ ; GCN-LABEL: v_and_v2i128:
840
+ ; GCN: ; %bb.0:
841
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v8
843
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v9
844
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v10
845
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v11
846
+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v12
847
+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v13
848
+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v14
849
+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v15
850
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
890
851
;
891
- ; GFX11-LABEL: v_and_v2i128:
892
- ; GFX11: ; %bb.0:
893
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
894
- ; GFX11-NEXT: v_and_b32_e32 v0, v0, v8
895
- ; GFX11-NEXT: v_and_b32_e32 v1, v1, v9
896
- ; GFX11-NEXT: v_and_b32_e32 v2, v2, v10
897
- ; GFX11-NEXT: v_and_b32_e32 v3, v3, v11
898
- ; GFX11-NEXT: v_and_b32_e32 v4, v4, v12
899
- ; GFX11-NEXT: v_and_b32_e32 v5, v5, v13
900
- ; GFX11-NEXT: v_and_b32_e32 v6, v6, v14
901
- ; GFX11-NEXT: v_and_b32_e32 v7, v7, v15
902
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
852
+ ; GFX10PLUS-LABEL: v_and_v2i128:
853
+ ; GFX10PLUS: ; %bb.0:
854
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v8
856
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v9
857
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v10
858
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v11
859
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v12
860
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v13
861
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v14
862
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v15
863
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
864
+ ;
865
+ ; GFX12-LABEL: v_and_v2i128:
866
+ ; GFX12: ; %bb.0:
867
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
868
+ ; GFX12-NEXT: s_wait_expcnt 0x0
869
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
870
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
871
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
872
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v8
873
+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v9
874
+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v10
875
+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v11
876
+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v12
877
+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v13
878
+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v14
879
+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v15
880
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
903
881
%and = and <2 x i128 > %a , %b
904
882
ret <2 x i128 > %and
905
883
}
906
884
907
885
define <2 x i128 > @v_and_v2i128_inline_imm (<2 x i128 > %a ) {
908
- ; GFX7-LABEL: v_and_v2i128_inline_imm:
909
- ; GFX7: ; %bb.0:
910
- ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
911
- ; GFX7-NEXT: v_and_b32_e32 v0, 64, v0
912
- ; GFX7-NEXT: v_and_b32_e32 v4, 64, v4
913
- ; GFX7-NEXT: v_mov_b32_e32 v1, 0
914
- ; GFX7-NEXT: v_mov_b32_e32 v2, 0
915
- ; GFX7-NEXT: v_mov_b32_e32 v3, 0
916
- ; GFX7-NEXT: v_mov_b32_e32 v5, 0
917
- ; GFX7-NEXT: v_mov_b32_e32 v6, 0
918
- ; GFX7-NEXT: v_mov_b32_e32 v7, 0
919
- ; GFX7-NEXT: s_setpc_b64 s[30:31]
920
- ;
921
- ; GFX9-LABEL: v_and_v2i128_inline_imm:
922
- ; GFX9: ; %bb.0:
923
- ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
924
- ; GFX9-NEXT: v_and_b32_e32 v0, 64, v0
925
- ; GFX9-NEXT: v_and_b32_e32 v4, 64, v4
926
- ; GFX9-NEXT: v_mov_b32_e32 v1, 0
927
- ; GFX9-NEXT: v_mov_b32_e32 v2, 0
928
- ; GFX9-NEXT: v_mov_b32_e32 v3, 0
929
- ; GFX9-NEXT: v_mov_b32_e32 v5, 0
930
- ; GFX9-NEXT: v_mov_b32_e32 v6, 0
931
- ; GFX9-NEXT: v_mov_b32_e32 v7, 0
932
- ; GFX9-NEXT: s_setpc_b64 s[30:31]
933
- ;
934
- ; GFX8-LABEL: v_and_v2i128_inline_imm:
935
- ; GFX8: ; %bb.0:
936
- ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937
- ; GFX8-NEXT: v_and_b32_e32 v0, 64, v0
938
- ; GFX8-NEXT: v_and_b32_e32 v4, 64, v4
939
- ; GFX8-NEXT: v_mov_b32_e32 v1, 0
940
- ; GFX8-NEXT: v_mov_b32_e32 v2, 0
941
- ; GFX8-NEXT: v_mov_b32_e32 v3, 0
942
- ; GFX8-NEXT: v_mov_b32_e32 v5, 0
943
- ; GFX8-NEXT: v_mov_b32_e32 v6, 0
944
- ; GFX8-NEXT: v_mov_b32_e32 v7, 0
945
- ; GFX8-NEXT: s_setpc_b64 s[30:31]
886
+ ; GCN-LABEL: v_and_v2i128_inline_imm:
887
+ ; GCN: ; %bb.0:
888
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889
+ ; GCN-NEXT: v_and_b32_e32 v0, 64, v0
890
+ ; GCN-NEXT: v_and_b32_e32 v4, 64, v4
891
+ ; GCN-NEXT: v_mov_b32_e32 v1, 0
892
+ ; GCN-NEXT: v_mov_b32_e32 v2, 0
893
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0
894
+ ; GCN-NEXT: v_mov_b32_e32 v5, 0
895
+ ; GCN-NEXT: v_mov_b32_e32 v6, 0
896
+ ; GCN-NEXT: v_mov_b32_e32 v7, 0
897
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
946
898
;
947
899
; GFX10-LABEL: v_and_v2i128_inline_imm:
948
900
; GFX10: ; %bb.0:
@@ -965,9 +917,217 @@ define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) {
965
917
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
966
918
; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
967
919
; GFX11-NEXT: s_setpc_b64 s[30:31]
920
+ ;
921
+ ; GFX12-LABEL: v_and_v2i128_inline_imm:
922
+ ; GFX12: ; %bb.0:
923
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
924
+ ; GFX12-NEXT: s_wait_expcnt 0x0
925
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
926
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
927
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
928
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0
929
+ ; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4
930
+ ; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
931
+ ; GFX12-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
932
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
968
933
%and = and <2 x i128 > %a , <i128 64 , i128 64 >
969
934
ret <2 x i128 > %and
970
935
}
936
+
937
+ define <3 x i128 > @v_and_v3i128 (<3 x i128 > %a , <3 x i128 > %b ) {
938
+ ; GCN-LABEL: v_and_v3i128:
939
+ ; GCN: ; %bb.0:
940
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
941
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v12
942
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v13
943
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v14
944
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v15
945
+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v16
946
+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v17
947
+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v18
948
+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v19
949
+ ; GCN-NEXT: v_and_b32_e32 v8, v8, v20
950
+ ; GCN-NEXT: v_and_b32_e32 v9, v9, v21
951
+ ; GCN-NEXT: v_and_b32_e32 v10, v10, v22
952
+ ; GCN-NEXT: v_and_b32_e32 v11, v11, v23
953
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
954
+ ;
955
+ ; GFX10PLUS-LABEL: v_and_v3i128:
956
+ ; GFX10PLUS: ; %bb.0:
957
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v12
959
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v13
960
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v14
961
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v15
962
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v16
963
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v17
964
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v18
965
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v19
966
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v8, v8, v20
967
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v9, v9, v21
968
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v10, v10, v22
969
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v11, v11, v23
970
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
971
+ ;
972
+ ; GFX12-LABEL: v_and_v3i128:
973
+ ; GFX12: ; %bb.0:
974
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
975
+ ; GFX12-NEXT: s_wait_expcnt 0x0
976
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
977
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
978
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
979
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v12
980
+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v13
981
+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v14
982
+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v15
983
+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v16
984
+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v17
985
+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v18
986
+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v19
987
+ ; GFX12-NEXT: v_and_b32_e32 v8, v8, v20
988
+ ; GFX12-NEXT: v_and_b32_e32 v9, v9, v21
989
+ ; GFX12-NEXT: v_and_b32_e32 v10, v10, v22
990
+ ; GFX12-NEXT: v_and_b32_e32 v11, v11, v23
991
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
992
+ %and = and <3 x i128 > %a , %b
993
+ ret <3 x i128 > %and
994
+ }
995
+
996
+ define <1 x i128 > @v_and_v1i128 (<1 x i128 > %a , <1 x i128 > %b ) {
997
+ ; GCN-LABEL: v_and_v1i128:
998
+ ; GCN: ; %bb.0:
999
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1000
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v4
1001
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v5
1002
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v6
1003
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v7
1004
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1005
+ ;
1006
+ ; GFX10PLUS-LABEL: v_and_v1i128:
1007
+ ; GFX10PLUS: ; %bb.0:
1008
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4
1010
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5
1011
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6
1012
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7
1013
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1014
+ ;
1015
+ ; GFX12-LABEL: v_and_v1i128:
1016
+ ; GFX12: ; %bb.0:
1017
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1018
+ ; GFX12-NEXT: s_wait_expcnt 0x0
1019
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
1020
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
1021
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1022
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v4
1023
+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v5
1024
+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v6
1025
+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v7
1026
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
1027
+ %and = and <1 x i128 > %a , %b
1028
+ ret <1 x i128 > %and
1029
+ }
1030
+
1031
+ define <2 x i256 > @v_and_v2i256 (<2 x i256 > %a , <2 x i256 > %b ) {
1032
+ ; GCN-LABEL: v_and_v2i256:
1033
+ ; GCN: ; %bb.0:
1034
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v16
1036
+ ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32
1037
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v17
1038
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v18
1039
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v19
1040
+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v20
1041
+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v21
1042
+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v22
1043
+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v23
1044
+ ; GCN-NEXT: v_and_b32_e32 v8, v8, v24
1045
+ ; GCN-NEXT: v_and_b32_e32 v9, v9, v25
1046
+ ; GCN-NEXT: v_and_b32_e32 v10, v10, v26
1047
+ ; GCN-NEXT: v_and_b32_e32 v11, v11, v27
1048
+ ; GCN-NEXT: v_and_b32_e32 v12, v12, v28
1049
+ ; GCN-NEXT: v_and_b32_e32 v13, v13, v29
1050
+ ; GCN-NEXT: v_and_b32_e32 v14, v14, v30
1051
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
1052
+ ; GCN-NEXT: v_and_b32_e32 v15, v15, v16
1053
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1054
+ ;
1055
+ ; GFX10-LABEL: v_and_v2i256:
1056
+ ; GFX10: ; %bb.0:
1057
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058
+ ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
1059
+ ; GFX10-NEXT: v_and_b32_e32 v0, v0, v16
1060
+ ; GFX10-NEXT: v_and_b32_e32 v1, v1, v17
1061
+ ; GFX10-NEXT: v_and_b32_e32 v2, v2, v18
1062
+ ; GFX10-NEXT: v_and_b32_e32 v3, v3, v19
1063
+ ; GFX10-NEXT: v_and_b32_e32 v4, v4, v20
1064
+ ; GFX10-NEXT: v_and_b32_e32 v5, v5, v21
1065
+ ; GFX10-NEXT: v_and_b32_e32 v6, v6, v22
1066
+ ; GFX10-NEXT: v_and_b32_e32 v7, v7, v23
1067
+ ; GFX10-NEXT: v_and_b32_e32 v8, v8, v24
1068
+ ; GFX10-NEXT: v_and_b32_e32 v9, v9, v25
1069
+ ; GFX10-NEXT: v_and_b32_e32 v10, v10, v26
1070
+ ; GFX10-NEXT: v_and_b32_e32 v11, v11, v27
1071
+ ; GFX10-NEXT: v_and_b32_e32 v12, v12, v28
1072
+ ; GFX10-NEXT: v_and_b32_e32 v13, v13, v29
1073
+ ; GFX10-NEXT: v_and_b32_e32 v14, v14, v30
1074
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
1075
+ ; GFX10-NEXT: v_and_b32_e32 v15, v15, v31
1076
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
1077
+ ;
1078
+ ; GFX11-LABEL: v_and_v2i256:
1079
+ ; GFX11: ; %bb.0:
1080
+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1081
+ ; GFX11-NEXT: scratch_load_b32 v31, off, s32
1082
+ ; GFX11-NEXT: v_and_b32_e32 v0, v0, v16
1083
+ ; GFX11-NEXT: v_and_b32_e32 v1, v1, v17
1084
+ ; GFX11-NEXT: v_and_b32_e32 v2, v2, v18
1085
+ ; GFX11-NEXT: v_and_b32_e32 v3, v3, v19
1086
+ ; GFX11-NEXT: v_and_b32_e32 v4, v4, v20
1087
+ ; GFX11-NEXT: v_and_b32_e32 v5, v5, v21
1088
+ ; GFX11-NEXT: v_and_b32_e32 v6, v6, v22
1089
+ ; GFX11-NEXT: v_and_b32_e32 v7, v7, v23
1090
+ ; GFX11-NEXT: v_and_b32_e32 v8, v8, v24
1091
+ ; GFX11-NEXT: v_and_b32_e32 v9, v9, v25
1092
+ ; GFX11-NEXT: v_and_b32_e32 v10, v10, v26
1093
+ ; GFX11-NEXT: v_and_b32_e32 v11, v11, v27
1094
+ ; GFX11-NEXT: v_and_b32_e32 v12, v12, v28
1095
+ ; GFX11-NEXT: v_and_b32_e32 v13, v13, v29
1096
+ ; GFX11-NEXT: v_and_b32_e32 v14, v14, v30
1097
+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
1098
+ ; GFX11-NEXT: v_and_b32_e32 v15, v15, v31
1099
+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
1100
+ ;
1101
+ ; GFX12-LABEL: v_and_v2i256:
1102
+ ; GFX12: ; %bb.0:
1103
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1104
+ ; GFX12-NEXT: s_wait_expcnt 0x0
1105
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
1106
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
1107
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1108
+ ; GFX12-NEXT: scratch_load_b32 v31, off, s32
1109
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v16
1110
+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v17
1111
+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v18
1112
+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v19
1113
+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v20
1114
+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v21
1115
+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v22
1116
+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v23
1117
+ ; GFX12-NEXT: v_and_b32_e32 v8, v8, v24
1118
+ ; GFX12-NEXT: v_and_b32_e32 v9, v9, v25
1119
+ ; GFX12-NEXT: v_and_b32_e32 v10, v10, v26
1120
+ ; GFX12-NEXT: v_and_b32_e32 v11, v11, v27
1121
+ ; GFX12-NEXT: v_and_b32_e32 v12, v12, v28
1122
+ ; GFX12-NEXT: v_and_b32_e32 v13, v13, v29
1123
+ ; GFX12-NEXT: v_and_b32_e32 v14, v14, v30
1124
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1125
+ ; GFX12-NEXT: v_and_b32_e32 v15, v15, v31
1126
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
1127
+ %and = and <2 x i256 > %a , %b
1128
+ ret <2 x i256 > %and
1129
+ }
1130
+
971
1131
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
972
1132
; GFX11-FAKE16: {{.*}}
973
1133
; GFX11-TRUE16: {{.*}}
0 commit comments