-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[NFC][AMDGPU] Align all gfx1250 VOP1 MC tests with downstream #149567
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NFC][AMDGPU] Align all gfx1250 VOP1 MC tests with downstream #149567
Conversation
This PR adds all VOP1 tests that haven't yet been upstreamed by copying the relevant test files directly from downstream. Afterward, the auto-generation script is run with the `--unique` option to deduplicate any redundant tests that may have been introduced during the downstream merge. Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com>
|
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesThis PR adds all VOP1 tests that haven't yet been upstreamed by copying the relevant test files directly from downstream. Afterward, the auto-generation script is run with the Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@amd.com> Patch is 671.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149567.diff 9 Files Affected:
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
index 3ddbc365224db..a313741ffe22d 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s
@@ -664,6 +664,45 @@ v_cvt_f32_fp8_e32 v1, 3
v_cvt_f32_fp8_e32 v1, v3
// GFX1250: v_cvt_f32_fp8_e32 v1, v3 ; encoding: [0x03,0xd9,0x02,0x7e]
+v_cvt_pk_f32_bf8_e32 v[2:3], s3
+// GFX1250: v_cvt_pk_f32_bf8_e32 v[2:3], s3 ; encoding: [0x03,0xde,0x04,0x7e]
+
+v_cvt_pk_f32_bf8_e32 v[4:5], s5
+// GFX1250: v_cvt_pk_f32_bf8_e32 v[4:5], s5 ; encoding: [0x05,0xde,0x08,0x7e]
+
+v_cvt_pk_f32_bf8_e32 v[2:3], 3
+// GFX1250: v_cvt_pk_f32_bf8_e32 v[2:3], 3 ; encoding: [0x83,0xde,0x04,0x7e]
+
+v_cvt_pk_f32_bf8_e32 v[4:5], 3
+// GFX1250: v_cvt_pk_f32_bf8_e32 v[4:5], 3 ; encoding: [0x83,0xde,0x08,0x7e]
+
+v_cvt_pk_f32_bf8_e32 v[2:3], v3
+// GFX1250: v_cvt_pk_f32_bf8_e32 v[2:3], v3 ; encoding: [0x03,0xdf,0x04,0x7e]
+
+v_cvt_pk_f32_bf8_e32 v[4:5], v3
+// GFX1250: v_cvt_pk_f32_bf8_e32 v[4:5], v3 ; encoding: [0x03,0xdf,0x08,0x7e]
+
+v_cvt_pk_f32_bf8_e32 v[4:5], v127.h
+// GFX1250: v_cvt_pk_f32_bf8_e32 v[4:5], v127.h ; encoding: [0xff,0xdf,0x08,0x7e]
+
+v_cvt_pk_f32_bf8_e32 v[4:5], v127.l
+// GFX1250: v_cvt_pk_f32_bf8_e32 v[4:5], v127.l ; encoding: [0x7f,0xdf,0x08,0x7e]
+
+v_cvt_pk_f32_fp8_e32 v[2:3], s3
+// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], s3 ; encoding: [0x03,0xdc,0x04,0x7e]
+
+v_cvt_pk_f32_fp8_e32 v[2:3], 3
+// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], 3 ; encoding: [0x83,0xdc,0x04,0x7e]
+
+v_cvt_pk_f32_fp8_e32 v[2:3], v3
+// GFX1250: v_cvt_pk_f32_fp8_e32 v[2:3], v3 ; encoding: [0x03,0xdd,0x04,0x7e]
+
+v_cvt_pk_f32_fp8_e32 v[4:5], v127.h
+// GFX1250: v_cvt_pk_f32_fp8_e32 v[4:5], v127.h ; encoding: [0xff,0xdd,0x08,0x7e]
+
+v_cvt_pk_f32_fp8_e32 v[4:5], v127.l
+// GFX1250: v_cvt_pk_f32_fp8_e32 v[4:5], v127.l ; encoding: [0x7f,0xdd,0x08,0x7e]
+
v_sat_pk4_i4_i8 v1, v2
// GFX1250: v_sat_pk4_i4_i8_e32 v1, v2 ; encoding: [0x02,0xe7,0x02,0x7e]
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
index aa4e49d85f1ff..0931523bbf40c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s
@@ -1,167 +1,3659 @@
// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5
// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s
-v_tanh_bf16_e64 v5, v1
-// GFX1250: v_tanh_bf16_e64 v5, v1 ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x01,0x00,0x00]
+v_bfrev_b32_e64 v5, v1
+// GFX1250: v_bfrev_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x01,0x00,0x00]
+
+v_bfrev_b32_e64 v5, v255
+// GFX1250: v_bfrev_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xb8,0xd5,0xff,0x01,0x00,0x00]
+
+v_bfrev_b32_e64 v5, s1
+// GFX1250: v_bfrev_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xb8,0xd5,0x01,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, s105
+// GFX1250: v_bfrev_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xb8,0xd5,0x69,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, vcc_lo
+// GFX1250: v_bfrev_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x6a,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, vcc_hi
+// GFX1250: v_bfrev_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x6b,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, ttmp15
+// GFX1250: v_bfrev_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb8,0xd5,0x7b,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, m0
+// GFX1250: v_bfrev_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xb8,0xd5,0x7d,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, exec_lo
+// GFX1250: v_bfrev_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb8,0xd5,0x7e,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, exec_hi
+// GFX1250: v_bfrev_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb8,0xd5,0x7f,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, null
+// GFX1250: v_bfrev_b32_e64 v5, null ; encoding: [0x05,0x00,0xb8,0xd5,0x7c,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, -1
+// GFX1250: v_bfrev_b32_e64 v5, -1 ; encoding: [0x05,0x00,0xb8,0xd5,0xc1,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, 0.5
+// GFX1250: v_bfrev_b32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb8,0xd5,0xf0,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v5, src_scc
+// GFX1250: v_bfrev_b32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb8,0xd5,0xfd,0x00,0x00,0x00]
+
+v_bfrev_b32_e64 v255, 0xaf123456
+// GFX1250: v_bfrev_b32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb8,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_ceil_f16_e64 v5, v1
+// GFX1250: v_ceil_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x01,0x00,0x00]
+
+v_ceil_f16_e64 v5, v255
+// GFX1250: v_ceil_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xdc,0xd5,0xff,0x01,0x00,0x00]
+
+v_ceil_f16_e64 v5, s1
+// GFX1250: v_ceil_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xdc,0xd5,0x01,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, s105
+// GFX1250: v_ceil_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xdc,0xd5,0x69,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, vcc_lo
+// GFX1250: v_ceil_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x6a,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, vcc_hi
+// GFX1250: v_ceil_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x6b,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, ttmp15
+// GFX1250: v_ceil_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xdc,0xd5,0x7b,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, m0
+// GFX1250: v_ceil_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xdc,0xd5,0x7d,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, exec_lo
+// GFX1250: v_ceil_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xdc,0xd5,0x7e,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, exec_hi
+// GFX1250: v_ceil_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xdc,0xd5,0x7f,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, null
+// GFX1250: v_ceil_f16_e64 v5, null ; encoding: [0x05,0x00,0xdc,0xd5,0x7c,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, -1
+// GFX1250: v_ceil_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xdc,0xd5,0xc1,0x00,0x00,0x00]
+
+v_ceil_f16_e64 v5, 0.5 mul:2
+// GFX1250: v_ceil_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xdc,0xd5,0xf0,0x00,0x00,0x08]
+
+v_ceil_f16_e64 v5, src_scc mul:4
+// GFX1250: v_ceil_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xdc,0xd5,0xfd,0x00,0x00,0x10]
+
+v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2
+// GFX1250: v_ceil_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xdc,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00]
+
+v_ceil_f32_e64 v5, v1
+// GFX1250: v_ceil_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x01,0x00,0x00]
+
+v_ceil_f32_e64 v5, v255
+// GFX1250: v_ceil_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xa2,0xd5,0xff,0x01,0x00,0x00]
+
+v_ceil_f32_e64 v5, s1
+// GFX1250: v_ceil_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xa2,0xd5,0x01,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, s105
+// GFX1250: v_ceil_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xa2,0xd5,0x69,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, vcc_lo
+// GFX1250: v_ceil_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x6a,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, vcc_hi
+// GFX1250: v_ceil_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x6b,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, ttmp15
+// GFX1250: v_ceil_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xa2,0xd5,0x7b,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, m0
+// GFX1250: v_ceil_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xa2,0xd5,0x7d,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, exec_lo
+// GFX1250: v_ceil_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xa2,0xd5,0x7e,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, exec_hi
+// GFX1250: v_ceil_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xa2,0xd5,0x7f,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, null
+// GFX1250: v_ceil_f32_e64 v5, null ; encoding: [0x05,0x00,0xa2,0xd5,0x7c,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, -1
+// GFX1250: v_ceil_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xa2,0xd5,0xc1,0x00,0x00,0x00]
+
+v_ceil_f32_e64 v5, 0.5 mul:2
+// GFX1250: v_ceil_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xa2,0xd5,0xf0,0x00,0x00,0x08]
+
+v_ceil_f32_e64 v5, src_scc mul:4
+// GFX1250: v_ceil_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xa2,0xd5,0xfd,0x00,0x00,0x10]
+
+v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2
+// GFX1250: v_ceil_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xa2,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+
+v_ceil_f64_e64 v[6:7], v[2:3]
+// GFX1250: v_ceil_f64_e64 v[6:7], v[2:3] ; encoding: [0x06,0x00,0x98,0xd5,0x02,0x01,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], v[254:255]
+// GFX1250: v_ceil_f64_e64 v[6:7], v[254:255] ; encoding: [0x06,0x00,0x98,0xd5,0xfe,0x01,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], s[2:3]
+// GFX1250: v_ceil_f64_e64 v[6:7], s[2:3] ; encoding: [0x06,0x00,0x98,0xd5,0x02,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], s[104:105]
+// GFX1250: v_ceil_f64_e64 v[6:7], s[104:105] ; encoding: [0x06,0x00,0x98,0xd5,0x68,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], vcc
+// GFX1250: v_ceil_f64_e64 v[6:7], vcc ; encoding: [0x06,0x00,0x98,0xd5,0x6a,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], ttmp[14:15]
+// GFX1250: v_ceil_f64_e64 v[6:7], ttmp[14:15] ; encoding: [0x06,0x00,0x98,0xd5,0x7a,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], exec
+// GFX1250: v_ceil_f64_e64 v[6:7], exec ; encoding: [0x06,0x00,0x98,0xd5,0x7e,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], null
+// GFX1250: v_ceil_f64_e64 v[6:7], null ; encoding: [0x06,0x00,0x98,0xd5,0x7c,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], -1
+// GFX1250: v_ceil_f64_e64 v[6:7], -1 ; encoding: [0x06,0x00,0x98,0xd5,0xc1,0x00,0x00,0x00]
+
+v_ceil_f64_e64 v[6:7], 0.5 mul:2
+// GFX1250: v_ceil_f64_e64 v[6:7], 0.5 mul:2 ; encoding: [0x06,0x00,0x98,0xd5,0xf0,0x00,0x00,0x08]
+
+v_ceil_f64_e64 v[6:7], -|src_scc| mul:4
+// GFX1250: v_ceil_f64_e64 v[6:7], -|src_scc| mul:4 ; encoding: [0x06,0x01,0x98,0xd5,0xfd,0x00,0x00,0x30]
+
+v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2
+// GFX1250: v_ceil_f64_e64 v[254:255], 0xaf123456 clamp div:2 ; encoding: [0xfe,0x80,0x98,0xd5,0xff,0x00,0x00,0x18,0x56,0x34,0x12,0xaf]
+
+v_cls_i32_e64 v5, v1
+// GFX1250: v_cls_i32_e64 v5, v1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x01,0x00,0x00]
+
+v_cls_i32_e64 v5, v255
+// GFX1250: v_cls_i32_e64 v5, v255 ; encoding: [0x05,0x00,0xbb,0xd5,0xff,0x01,0x00,0x00]
+
+v_cls_i32_e64 v5, s1
+// GFX1250: v_cls_i32_e64 v5, s1 ; encoding: [0x05,0x00,0xbb,0xd5,0x01,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, s105
+// GFX1250: v_cls_i32_e64 v5, s105 ; encoding: [0x05,0x00,0xbb,0xd5,0x69,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, vcc_lo
+// GFX1250: v_cls_i32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x6a,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, vcc_hi
+// GFX1250: v_cls_i32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x6b,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, ttmp15
+// GFX1250: v_cls_i32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xbb,0xd5,0x7b,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, m0
+// GFX1250: v_cls_i32_e64 v5, m0 ; encoding: [0x05,0x00,0xbb,0xd5,0x7d,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, exec_lo
+// GFX1250: v_cls_i32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xbb,0xd5,0x7e,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, exec_hi
+// GFX1250: v_cls_i32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xbb,0xd5,0x7f,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, null
+// GFX1250: v_cls_i32_e64 v5, null ; encoding: [0x05,0x00,0xbb,0xd5,0x7c,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, -1
+// GFX1250: v_cls_i32_e64 v5, -1 ; encoding: [0x05,0x00,0xbb,0xd5,0xc1,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, 0.5
+// GFX1250: v_cls_i32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xbb,0xd5,0xf0,0x00,0x00,0x00]
+
+v_cls_i32_e64 v5, src_scc
+// GFX1250: v_cls_i32_e64 v5, src_scc ; encoding: [0x05,0x00,0xbb,0xd5,0xfd,0x00,0x00,0x00]
+
+v_cls_i32_e64 v255, 0xaf123456
+// GFX1250: v_cls_i32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xbb,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_clz_i32_u32_e64 v5, v1
+// GFX1250: v_clz_i32_u32_e64 v5, v1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x01,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, v255
+// GFX1250: v_clz_i32_u32_e64 v5, v255 ; encoding: [0x05,0x00,0xb9,0xd5,0xff,0x01,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, s1
+// GFX1250: v_clz_i32_u32_e64 v5, s1 ; encoding: [0x05,0x00,0xb9,0xd5,0x01,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, s105
+// GFX1250: v_clz_i32_u32_e64 v5, s105 ; encoding: [0x05,0x00,0xb9,0xd5,0x69,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, vcc_lo
+// GFX1250: v_clz_i32_u32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x6a,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, vcc_hi
+// GFX1250: v_clz_i32_u32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x6b,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, ttmp15
+// GFX1250: v_clz_i32_u32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb9,0xd5,0x7b,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, m0
+// GFX1250: v_clz_i32_u32_e64 v5, m0 ; encoding: [0x05,0x00,0xb9,0xd5,0x7d,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, exec_lo
+// GFX1250: v_clz_i32_u32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb9,0xd5,0x7e,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, exec_hi
+// GFX1250: v_clz_i32_u32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb9,0xd5,0x7f,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, null
+// GFX1250: v_clz_i32_u32_e64 v5, null ; encoding: [0x05,0x00,0xb9,0xd5,0x7c,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, -1
+// GFX1250: v_clz_i32_u32_e64 v5, -1 ; encoding: [0x05,0x00,0xb9,0xd5,0xc1,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, 0.5
+// GFX1250: v_clz_i32_u32_e64 v5, 0.5 ; encoding: [0x05,0x00,0xb9,0xd5,0xf0,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v5, src_scc
+// GFX1250: v_clz_i32_u32_e64 v5, src_scc ; encoding: [0x05,0x00,0xb9,0xd5,0xfd,0x00,0x00,0x00]
+
+v_clz_i32_u32_e64 v255, 0xaf123456
+// GFX1250: v_clz_i32_u32_e64 v255, 0xaf123456 ; encoding: [0xff,0x00,0xb9,0xd5,0xff,0x00,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cos_f16_e64 v5, v1
+// GFX1250: v_cos_f16_e64 v5, v1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x01,0x00,0x00]
+
+v_cos_f16_e64 v5, v255
+// GFX1250: v_cos_f16_e64 v5, v255 ; encoding: [0x05,0x00,0xe1,0xd5,0xff,0x01,0x00,0x00]
+
+v_cos_f16_e64 v5, s1
+// GFX1250: v_cos_f16_e64 v5, s1 ; encoding: [0x05,0x00,0xe1,0xd5,0x01,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, s105
+// GFX1250: v_cos_f16_e64 v5, s105 ; encoding: [0x05,0x00,0xe1,0xd5,0x69,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, vcc_lo
+// GFX1250: v_cos_f16_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x6a,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, vcc_hi
+// GFX1250: v_cos_f16_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x6b,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, ttmp15
+// GFX1250: v_cos_f16_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xe1,0xd5,0x7b,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, m0
+// GFX1250: v_cos_f16_e64 v5, m0 ; encoding: [0x05,0x00,0xe1,0xd5,0x7d,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, exec_lo
+// GFX1250: v_cos_f16_e64 v5, exec_lo ; encoding: [0x05,0x00,0xe1,0xd5,0x7e,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, exec_hi
+// GFX1250: v_cos_f16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xe1,0xd5,0x7f,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, null
+// GFX1250: v_cos_f16_e64 v5, null ; encoding: [0x05,0x00,0xe1,0xd5,0x7c,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, -1
+// GFX1250: v_cos_f16_e64 v5, -1 ; encoding: [0x05,0x00,0xe1,0xd5,0xc1,0x00,0x00,0x00]
+
+v_cos_f16_e64 v5, 0.5 mul:2
+// GFX1250: v_cos_f16_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xe1,0xd5,0xf0,0x00,0x00,0x08]
+
+v_cos_f16_e64 v5, src_scc mul:4
+// GFX1250: v_cos_f16_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xe1,0xd5,0xfd,0x00,0x00,0x10]
+
+v_cos_f16_e64 v255, -|0xfe0b| clamp div:2
+// GFX1250: v_cos_f16_e64 v255, -|0xfe0b| clamp div:2 ; encoding: [0xff,0x81,0xe1,0xd5,0xff,0x00,0x00,0x38,0x0b,0xfe,0x00,0x00]
+
+v_cos_f32_e64 v5, v1
+// GFX1250: v_cos_f32_e64 v5, v1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x01,0x00,0x00]
+
+v_cos_f32_e64 v5, v255
+// GFX1250: v_cos_f32_e64 v5, v255 ; encoding: [0x05,0x00,0xb6,0xd5,0xff,0x01,0x00,0x00]
+
+v_cos_f32_e64 v5, s1
+// GFX1250: v_cos_f32_e64 v5, s1 ; encoding: [0x05,0x00,0xb6,0xd5,0x01,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, s105
+// GFX1250: v_cos_f32_e64 v5, s105 ; encoding: [0x05,0x00,0xb6,0xd5,0x69,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, vcc_lo
+// GFX1250: v_cos_f32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x6a,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, vcc_hi
+// GFX1250: v_cos_f32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x6b,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, ttmp15
+// GFX1250: v_cos_f32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xb6,0xd5,0x7b,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, m0
+// GFX1250: v_cos_f32_e64 v5, m0 ; encoding: [0x05,0x00,0xb6,0xd5,0x7d,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, exec_lo
+// GFX1250: v_cos_f32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xb6,0xd5,0x7e,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, exec_hi
+// GFX1250: v_cos_f32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xb6,0xd5,0x7f,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, null
+// GFX1250: v_cos_f32_e64 v5, null ; encoding: [0x05,0x00,0xb6,0xd5,0x7c,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, -1
+// GFX1250: v_cos_f32_e64 v5, -1 ; encoding: [0x05,0x00,0xb6,0xd5,0xc1,0x00,0x00,0x00]
+
+v_cos_f32_e64 v5, 0.5 mul:2
+// GFX1250: v_cos_f32_e64 v5, 0.5 mul:2 ; encoding: [0x05,0x00,0xb6,0xd5,0xf0,0x00,0x00,0x08]
+
+v_cos_f32_e64 v5, src_scc mul:4
+// GFX1250: v_cos_f32_e64 v5, src_scc mul:4 ; encoding: [0x05,0x00,0xb6,0xd5,0xfd,0x00,0x00,0x10]
+
+v_cos_f32_e64 v255, -|0xaf123456| clamp div:2
+// GFX1250: v_cos_f32_e64 v255, -|0xaf123456| clamp div:2 ; encoding: [0xff,0x81,0xb6,0xd5,0xff,0x00,0x00,0x38,0x56,0x34,0x12,0xaf]
+
+v_ctz_i32_b32_e64 v5, v1
+// GFX1250: v_ctz_i32_b32_e64 v5, v1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x01,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, v255
+// GFX1250: v_ctz_i32_b32_e64 v5, v255 ; encoding: [0x05,0x00,0xba,0xd5,0xff,0x01,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, s1
+// GFX1250: v_ctz_i32_b32_e64 v5, s1 ; encoding: [0x05,0x00,0xba,0xd5,0x01,0x00,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, s105
+// GFX1250: v_ctz_i32_b32_e64 v5, s105 ; encoding: [0x05,0x00,0xba,0xd5,0x69,0x00,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, vcc_lo
+// GFX1250: v_ctz_i32_b32_e64 v5, vcc_lo ; encoding: [0x05,0x00,0xba,0xd5,0x6a,0x00,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, vcc_hi
+// GFX1250: v_ctz_i32_b32_e64 v5, vcc_hi ; encoding: [0x05,0x00,0xba,0xd5,0x6b,0x00,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, ttmp15
+// GFX1250: v_ctz_i32_b32_e64 v5, ttmp15 ; encoding: [0x05,0x00,0xba,0xd5,0x7b,0x00,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, m0
+// GFX1250: v_ctz_i32_b32_e64 v5, m0 ; encoding: [0x05,0x00,0xba,0xd5,0x7d,0x00,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, exec_lo
+// GFX1250: v_ctz_i32_b32_e64 v5, exec_lo ; encoding: [0x05,0x00,0xba,0xd5,0x7e,0x00,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, exec_hi
+// GFX1250: v_ctz_i32_b32_e64 v5, exec_hi ; encoding: [0x05,0x00,0xba,0xd5,0x7f,0x00,0x00,0x00]
+
+v_ctz_i32_b32_e64 v5, null
+// GFX1250: v_ctz_i32_b32_e64 v5, null ; encoding: [0x05,0x00,0xba,0xd5,0x7c,0x00,0x00,0x00]
+
+v_ctz_i32_b32...
[truncated]
|
|
@mariusz-sikora-at-amd This will cause downstream conflict, but I'd expect to simply just take these when merging. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Do you want to land it downstream first to avoid conflicts?
|
You mean to directly copy past there? |
Yes, or copy over if these are complete now. |

This PR adds all VOP1 tests that haven't yet been upstreamed by copying the relevant test files directly from downstream. Afterward, the auto-generation script is run with the
--uniqueoption to deduplicate any redundant tests that may have been introduced during the downstream merge.Co-authored-by: Mekhanoshin, Stanislav Stanislav.Mekhanoshin@amd.com