Skip to content

Commit

Permalink
[GPU] Applied comments and fixed unit-test failure
Browse files Browse the repository at this point in the history
Signed-off-by: Min, Byung-il <byungil.min@intel.com>
  • Loading branch information
byungilm committed Oct 17, 2024
1 parent d19b925 commit d7ac7eb
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
namespace ov {
namespace intel_gpu {

ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8) {
ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed() {
using namespace ov::pass::pattern;

auto compressed_constant = [](const ov::Output<ov::Node>& output) {
Expand Down Expand Up @@ -83,8 +83,9 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon
bool grouped = std::count_if(scale_shape.begin(), scale_shape.end(), [](size_t d) { return d > 1; }) > 1;

auto weight_ptr = std::dynamic_pointer_cast<ov::op::v0::Constant>(pattern_map.at(weights_m).get_node_shared_ptr());
bool weight_u8 = (weight_ptr->get_element_type() == ov::element::u8) ? true : false;
bool weight_i4 = (weight_ptr->get_element_type().bitwidth() == 4) ? true : false;
bool weight_u8 = false;
if (weight_ptr->get_element_type() == ov::element::u8 || weight_ptr->get_element_type() == ov::element::i8)
weight_u8 = true;

auto reshape_const_to_2d = [has_transpose, grouped](std::shared_ptr<ov::Node> node) {
auto constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
Expand All @@ -103,15 +104,13 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon

auto convert_const_to_u8 = [&](std::shared_ptr<ov::Node> node) {
auto constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
if (constant->get_element_type() == ov::element::u8)
return std::dynamic_pointer_cast<ov::Node>(constant);
// WA: Convert ZP to u8 to avoid u4 reorder
if (convert_u4zp_to_u8 && constant->get_element_type() == ov::element::u4)
if (constant->get_element_type() == ov::element::u4)
return std::dynamic_pointer_cast<ov::Node>(std::make_shared<ov::op::v0::Convert>(node, ov::element::u8));
// Convert ZP to u8
if (weight_u8)
return std::dynamic_pointer_cast<ov::Node>(std::make_shared<ov::op::v0::Convert>(node, ov::element::u8));
if (!weight_u8 && !weight_i4)
return std::dynamic_pointer_cast<ov::Node>(constant);

return std::dynamic_pointer_cast<ov::Node>(std::make_shared<ov::op::v0::Convert>(node, ov::element::u8));
return std::dynamic_pointer_cast<ov::Node>(constant);
};


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace intel_gpu {
class ConvertFullyConnectedToFullyConnectedCompressed: public ov::pass::MatcherPass {
public:
OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedCompressed", "0");
ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8 = false);
ConvertFullyConnectedToFullyConnectedCompressed();
};

} // namespace intel_gpu
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,7 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
manager.register_pass<ov::intel_gpu::ClampFP16Output>();
manager.register_pass<ov::intel_gpu::ConvertMatMulToFullyConnected>();
manager.register_pass<ov::intel_gpu::MoveFCReshapeToWeights>();
manager.register_pass<ov::intel_gpu::ConvertFullyConnectedToFullyConnectedCompressed>(device_info.supports_immad);
manager.register_pass<ov::intel_gpu::ConvertFullyConnectedToFullyConnectedCompressed>();

bool disable_horizontal_fc_fusion = false;
GPU_DEBUG_GET_INSTANCE(debug_config);
Expand Down

0 comments on commit d7ac7eb

Please sign in to comment.