[TEMP] bugfix

Signed-off-by: Min, Byungil <byungil.min@intel.com>
openvinotoolkit · Oct 16, 2024 · f9ea57b · f9ea57b
1 parent 26a78f0
commit f9ea57b
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 7 deletions.
diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl
@@ -871,7 +871,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan_none_slm)(
     uint batch_sglid = (sglid * TILE_K) / TILE_IFM_ELEMENTS_SIZE;     // 0 to 1 : to batch direction
 
     const uint scale_pitch = TILE_IN_B_PITCH / QUANTIZE_GROUP_SIZE;
-    // MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { };
+    MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { };
     __attribute__((opencl_unroll_hint(1)))
     for (uint ni = 0; ni < iterations; ++ni) {
         uint in_offset = input_offset + (idx_sglid + batch_sglid * TILE_IN_B_PITCH);
@@ -903,7 +903,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan_none_slm)(
 
         input_offset += TILE_IFM_ELEMENTS_SIZE;
 
-        MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { };
+        // MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { };
 
         //#if TILE_OFM != 2
         //#error "FC bf_tiled kernel: can't use SLM optimization with TILE_OFM != 2"

diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp
@@ -4008,24 +4008,36 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_sta
 
 // [TEST]
 TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_128_large) {
-    this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 320, 1024, 1024, 32, 32, true);
+    this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 1024, 4096, 4096, 128, 128, true);
+}
+
+TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_32_large) {
+    this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 1024, 4096, 4096, 32, 32, true);
 }
 
 TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_128_large_unaligned) {
-    this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 310, 1024, 1024, 32, 32, true);
+    this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 1014, 4096, 4096, 128, 128, true);
 }
 
 TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_128_small) {
-    this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 16, 1024, 1024, 32, 32, true);
+    this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 16, 4096, 4096, 128, 128, true);
 }
 
 // [TEST]
+TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_32_large) {
+    this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 1024, 4096, 4096, 32, 32, true);
+}
+
 TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large) {
-    this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 1024, 1024, 32, 32, true);
+    this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 1024, 4096, 4096, 128, 128, true);
 }
 
 TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large_unaligned) {
-    this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 310, 1024, 1024, 32, 32, true);
+    this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 1014, 4096, 4096, 128, 128, true);
+}
+
+TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_small) {
+    this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 16, 4096, 4096, 128, 128, true);
 }