diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index 131a0f03079b35..6d7d6bcb4b4d7c 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -24,7 +24,7 @@ namespace ov { namespace intel_gpu { -ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8) { +ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed() { using namespace ov::pass::pattern; auto compressed_constant = [](const ov::Output& output) { @@ -83,8 +83,9 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon bool grouped = std::count_if(scale_shape.begin(), scale_shape.end(), [](size_t d) { return d > 1; }) > 1; auto weight_ptr = std::dynamic_pointer_cast(pattern_map.at(weights_m).get_node_shared_ptr()); - bool weight_u8 = (weight_ptr->get_element_type() == ov::element::u8) ? true : false; - bool weight_i4 = (weight_ptr->get_element_type().bitwidth() == 4) ? true : false; + bool weight_u8 = false; + if (weight_ptr->get_element_type() == ov::element::u8 || weight_ptr->get_element_type() == ov::element::i8) + weight_u8 = true; auto reshape_const_to_2d = [has_transpose, grouped](std::shared_ptr node) { auto constant = std::dynamic_pointer_cast(node); @@ -103,15 +104,13 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon auto convert_const_to_u8 = [&](std::shared_ptr node) { auto constant = std::dynamic_pointer_cast(node); - if (constant->get_element_type() == ov::element::u8) - return std::dynamic_pointer_cast(constant); - // WA: Convert ZP to u8 to avoid u4 reorder - if (convert_u4zp_to_u8 && constant->get_element_type() == ov::element::u4) + if (constant->get_element_type() == ov::element::u4) + return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); + // Convert ZP to u8 + if (weight_u8) return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); - if (!weight_u8 && !weight_i4) - return std::dynamic_pointer_cast(constant); - return std::dynamic_pointer_cast(std::make_shared(node, ov::element::u8)); + return std::dynamic_pointer_cast(constant); }; diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp index 641f55ead5fdaf..d2bc71a91f1285 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.hpp @@ -12,7 +12,7 @@ namespace intel_gpu { class ConvertFullyConnectedToFullyConnectedCompressed: public ov::pass::MatcherPass { public: OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedCompressed", "0"); - ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8 = false); + ConvertFullyConnectedToFullyConnectedCompressed(); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index 9917044fd4904d..7e98632c288ad2 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -810,7 +810,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); manager.register_pass(); manager.register_pass(); - manager.register_pass(device_info.supports_immad); + manager.register_pass(); bool disable_horizontal_fc_fusion = false; GPU_DEBUG_GET_INSTANCE(debug_config);