Skip to content

Commit

Permalink
[TEMP] bugfix
Browse files Browse the repository at this point in the history
Signed-off-by: Min, Byungil <byungil.min@intel.com>
  • Loading branch information
byungilm committed Oct 16, 2024
1 parent 26a78f0 commit f9ea57b
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan_none_slm)(
uint batch_sglid = (sglid * TILE_K) / TILE_IFM_ELEMENTS_SIZE; // 0 to 1 : to batch direction

const uint scale_pitch = TILE_IN_B_PITCH / QUANTIZE_GROUP_SIZE;
// MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { };
MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { };
__attribute__((opencl_unroll_hint(1)))
for (uint ni = 0; ni < iterations; ++ni) {
uint in_offset = input_offset + (idx_sglid + batch_sglid * TILE_IN_B_PITCH);
Expand Down Expand Up @@ -903,7 +903,7 @@ inline void FUNC(fc_bf_tiled_kernel_dyn_quan_none_slm)(

input_offset += TILE_IFM_ELEMENTS_SIZE;

MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { };
// MAKE_VECTOR_TYPE(int, TILE_B) acc_tmp[TILE_OFM] = { };

//#if TILE_OFM != 2
//#error "FC bf_tiled kernel: can't use SLM optimization with TILE_OFM != 2"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4008,24 +4008,36 @@ TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_sta

// [TEST]
TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_128_large) {
this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 320, 1024, 1024, 32, 32, true);
this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 1024, 4096, 4096, 128, 128, true);
}

TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_32_large) {
this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 1024, 4096, 4096, 32, 32, true);
}

TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_128_large_unaligned) {
this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 310, 1024, 1024, 32, 32, true);
this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 1014, 4096, 4096, 128, 128, true);
}

TEST_F(fully_connected_gpu_tests, compressed_int4_scale_dynamic_quantize_wzp_128_small) {
this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 16, 1024, 1024, 32, 32, true);
this->test_compressed_int4_scale_dyn_quan_weight_i4(true, 16, 4096, 4096, 128, 128, true);
}

// [TEST]
TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_32_large) {
this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 1024, 4096, 4096, 32, 32, true);
}

TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large) {
this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 320, 1024, 1024, 32, 32, true);
this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 1024, 4096, 4096, 128, 128, true);
}

TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_large_unaligned) {
this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 310, 1024, 1024, 32, 32, true);
this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 1014, 4096, 4096, 128, 128, true);
}

TEST_F(fully_connected_gpu_tests, compressed_int8_scale_dynamic_quantize_wzp_128_small) {
this->test_compressed_int8_scale_dyn_quan_weight_u8(true, 16, 4096, 4096, 128, 128, true);
}


Expand Down

0 comments on commit f9ea57b

Please sign in to comment.