diff --git a/src/common/transformations/include/ov_ops/fully_connected.hpp b/src/common/transformations/include/ov_ops/fully_connected.hpp
new file mode 100644
index 00000000000000..a71abcd42d54b3
--- /dev/null
+++ b/src/common/transformations/include/ov_ops/fully_connected.hpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/node.hpp"
+#include "openvino/op/op.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace op {
+namespace internal {
+
+class TRANSFORMATIONS_API FullyConnected : public ov::op::Op {
+public:
+    OPENVINO_OP("FullyConnected", "ie_internal_opset");
+
+    FullyConnected() = default;
+
+    FullyConnected(const ov::Output<Node>& A,
+                   const ov::Output<Node>& B,
+                   const ov::Output<Node>& bias,
+                   const ov::element::Type output_type = ov::element::undefined);
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override;
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
+
+    virtual std::shared_ptr<Node> fuse_bias(const ov::Output<Node>& bias) const;
+
+    ov::element::Type get_output_type() const {
+        return m_output_type;
+    }
+
+protected:
+    ov::element::Type m_output_type;
+};
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ov
diff --git a/src/common/transformations/include/ov_ops/fully_connected_quantized.hpp b/src/common/transformations/include/ov_ops/fully_connected_quantized.hpp
new file mode 100644
index 00000000000000..9b93f6501056b4
--- /dev/null
+++ b/src/common/transformations/include/ov_ops/fully_connected_quantized.hpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/node.hpp"
+#include "openvino/op/op.hpp"
+#include "ov_ops/fully_connected.hpp"
+
+namespace ov {
+namespace op {
+namespace internal {
+
+class TRANSFORMATIONS_API FullyConnectedQuantized : public ov::op::internal::FullyConnected {
+public:
+    OPENVINO_OP("FullyConnectedQuantized", "gpu_opset");
+
+    FullyConnectedQuantized() = default;
+
+    FullyConnectedQuantized(const ov::Output<Node>& X,
+                            const ov::Output<Node>& W,
+                            const ov::Output<Node>& bias,
+                            const ov::Output<Node>& weight_scales,
+                            const ov::Output<Node>& weight_zero_points,
+                            const ov::Output<Node>& input_scales,
+                            const ov::Output<Node>& input_zero_points,
+                            const ov::Output<Node>& output_scales,
+                            const ov::Output<Node>& output_zero_points,
+                            const ov::element::Type output_type = ov::element::undefined);
+
+    FullyConnectedQuantized(const ov::Output<Node>& X,
+                            const ov::Output<Node>& W,
+                            const ov::Output<Node>& bias,
+                            const ov::Output<Node>& weight_scales,
+                            const ov::Output<Node>& weight_zero_points,
+                            const ov::Output<Node>& input_scales,
+                            const ov::element::Type output_type = ov::element::undefined);
+
+    FullyConnectedQuantized(const ov::Output<Node>& X,
+                            const ov::Output<Node>& W,
+                            const ov::Output<Node>& bias,
+                            const ov::Output<Node>& weight_scales,
+                            const ov::Output<Node>& weight_zero_points,
+                            const ov::element::Type output_type = ov::element::undefined);
+
+    FullyConnectedQuantized(const ov::Output<Node>& X,
+                            const ov::Output<Node>& W,
+                            const ov::Output<Node>& bias,
+                            const ov::Output<Node>& weight_scales,
+                            const ov::element::Type output_type = ov::element::undefined);
+
+    // FullyConnectedQuantized(const ov::Output<Node>& X,
+    //                         const ov::Output<Node>& W,
+    //                         const ov::Output<Node>& bias,
+    //                         const ov::Output<Node>& weight_scales,
+    //                         const ov::Output<Node>& weight_zero_points,
+    //                         const ov::Output<Node>& input_scales,
+    //                         const ov::Output<Node>& input_zero_points,
+    //                         const ov::element::Type output_type = ov::element::undefined);
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override;
+
+    void validate_and_infer_types() override;
+
+    std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
+
+    std::shared_ptr<Node> fuse_bias(const ov::Output<Node>& bias) const override final;
+
+    ov::element::Type get_output_type() const {
+        return m_output_type;
+    }
+};
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ov
diff --git a/src/common/transformations/include/ov_ops/placeholder.hpp b/src/common/transformations/include/ov_ops/placeholder.hpp
new file mode 100644
index 00000000000000..6a5c1e236389ce
--- /dev/null
+++ b/src/common/transformations/include/ov_ops/placeholder.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/op/op.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace op {
+namespace internal {
+
+class TRANSFORMATIONS_API Placeholder : public ov::op::Op {
+public:
+    OPENVINO_OP("Placeholder", "ie_internal_opset");
+
+    Placeholder();
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override;
+    void validate_and_infer_types() override;
+    std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
+};
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ov
diff --git a/src/common/transformations/include/transformations/op_conversions/convert_fc_to_compressed.hpp b/src/common/transformations/include/transformations/op_conversions/convert_fc_to_compressed.hpp
new file mode 100644
index 00000000000000..8e2aa617f29dcd
--- /dev/null
+++ b/src/common/transformations/include/transformations/op_conversions/convert_fc_to_compressed.hpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertFullyConnectedToFullyConnectedCompressed;
+
+}  // namespace pass
+}  // namespace ov
+
+class ov::pass::ConvertFullyConnectedToFullyConnectedCompressed : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedCompressed", "0");
+    ConvertFullyConnectedToFullyConnectedCompressed(bool convert_u4zp_to_u8 = false);
+};
diff --git a/src/common/transformations/include/transformations/op_conversions/convert_fc_to_quantized.hpp b/src/common/transformations/include/transformations/op_conversions/convert_fc_to_quantized.hpp
new file mode 100644
index 00000000000000..9107f7333fc4df
--- /dev/null
+++ b/src/common/transformations/include/transformations/op_conversions/convert_fc_to_quantized.hpp
@@ -0,0 +1,22 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API ConvertFullyConnectedToFullyConnectedQuantized;
+
+}  // namespace pass
+}  // namespace ov
+
+class ov::pass::ConvertFullyConnectedToFullyConnectedQuantized : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("ConvertFullyConnectedToFullyConnectedQuantized", "0");
+    ConvertFullyConnectedToFullyConnectedQuantized();
+};
diff --git a/src/common/transformations/src/ov_ops/fully_connected.cpp b/src/common/transformations/src/ov_ops/fully_connected.cpp
new file mode 100644
index 00000000000000..1938e2becac890
--- /dev/null
+++ b/src/common/transformations/src/ov_ops/fully_connected.cpp
@@ -0,0 +1,62 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ov_ops/fully_connected.hpp"
+
+#include <memory>
+
+#include "matmul_shape_inference.hpp"
+#include "ov_ops/placeholder.hpp"
+
+namespace ov {
+namespace op {
+namespace internal {
+
+FullyConnected::FullyConnected(const ov::Output<Node>& A,
+                               const ov::Output<Node>& B,
+                               const ov::Output<Node>& bias,
+                               const ov::element::Type output_type)
+    : Op({A, B, bias}),
+      m_output_type(output_type) {
+    validate_and_infer_types();
+}
+
+std::shared_ptr<ov::Node> FullyConnected::clone_with_new_inputs(const ov::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+
+    return std::make_shared<FullyConnected>(new_args.at(0), new_args.at(1), new_args.at(2), m_output_type);
+}
+
+std::shared_ptr<Node> FullyConnected::fuse_bias(const ov::Output<Node>& bias) const {
+    return std::make_shared<FullyConnected>(input_value(0), input_value(1), bias, m_output_type);
+}
+
+void FullyConnected::validate_and_infer_types() {
+    const auto input_size = get_input_size();
+    NODE_VALIDATION_CHECK(this,
+                          input_size >= 3,
+                          "Number of inputs is incorrect. Current value is: ",
+                          input_size,
+                          ", expected at least 3.");
+
+    ov::op::v0::MatMul op;
+    op.set_transpose_a(false);
+    op.set_transpose_b(true);
+
+    auto out_shapes =
+        ov::op::v0::shape_infer(&op,
+                                std::vector<ov::PartialShape>{get_input_partial_shape(0), get_input_partial_shape(1)});
+
+    auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type;
+    set_output_type(0, output_type, out_shapes[0]);
+}
+
+bool FullyConnected::visit_attributes(ov::AttributeVisitor& visitor) {
+    visitor.on_attribute("output_type", m_output_type);
+    return true;
+}
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ov
diff --git a/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp b/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp
new file mode 100644
index 00000000000000..b632ac8f113d0c
--- /dev/null
+++ b/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp
@@ -0,0 +1,150 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ov_ops/fully_connected_quantized.hpp"
+
+#include "matmul_shape_inference.hpp"
+#include "ov_ops/placeholder.hpp"
+
+namespace ov {
+namespace op {
+namespace internal {
+
+FullyConnectedQuantized::FullyConnectedQuantized(const ov::Output<Node>& X,
+                                                 const ov::Output<Node>& W,
+                                                 const ov::Output<Node>& bias,
+                                                 const ov::Output<Node>& weight_scales,
+                                                 const ov::Output<Node>& weight_zero_points,
+                                                 const ov::Output<Node>& input_scales,
+                                                 const ov::Output<Node>& input_zero_points,
+                                                 const ov::Output<Node>& output_scales,
+                                                 const ov::Output<Node>& output_zero_points,
+                                                 const ov::element::Type output_type)
+    : FullyConnected(X, W, bias, output_type) {
+    set_argument(3, weight_scales);
+    set_argument(4, weight_zero_points);
+    set_argument(5, input_scales);
+    set_argument(6, input_zero_points);
+    set_argument(7, output_scales);
+    set_argument(8, output_zero_points);
+    validate_and_infer_types();
+}
+
+FullyConnectedQuantized::FullyConnectedQuantized(const ov::Output<Node>& X,
+                                                 const ov::Output<Node>& W,
+                                                 const ov::Output<Node>& bias,
+                                                 const ov::Output<Node>& weight_scales,
+                                                 const ov::Output<Node>& weight_zero_points,
+                                                 const ov::Output<Node>& input_scales,
+                                                 const ov::element::Type output_type)
+    : FullyConnected(X, W, bias, output_type) {
+    set_argument(3, weight_scales);
+    set_argument(4, weight_zero_points);
+    set_argument(5, input_scales);
+    validate_and_infer_types();
+}
+
+FullyConnectedQuantized::FullyConnectedQuantized(const ov::Output<Node>& X,
+                                                 const ov::Output<Node>& W,
+                                                 const ov::Output<Node>& bias,
+                                                 const ov::Output<Node>& weight_scales,
+                                                 const ov::Output<Node>& weight_zero_points,
+                                                 const ov::element::Type output_type)
+    : FullyConnected(X, W, bias, output_type) {
+    set_argument(3, weight_scales);
+    set_argument(4, weight_zero_points);
+}
+
+FullyConnectedQuantized::FullyConnectedQuantized(const ov::Output<Node>& X,
+                                                 const ov::Output<Node>& W,
+                                                 const ov::Output<Node>& bias,
+                                                 const ov::Output<Node>& weight_scales,
+                                                 const ov::element::Type output_type)
+    : FullyConnected(X, W, bias, output_type) {
+    set_argument(3, weight_scales);
+}
+
+std::shared_ptr<ov::Node> FullyConnectedQuantized::clone_with_new_inputs(const ov::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+
+    return std::make_shared<FullyConnectedQuantized>(new_args.at(0),
+                                                     new_args.at(1),
+                                                     new_args.at(2),
+                                                     new_args.at(3),
+                                                     new_args.at(4),
+                                                     new_args.at(5),
+                                                     new_args.at(6),
+                                                     new_args.at(7),
+                                                     new_args.at(8),
+                                                     m_output_type);
+}
+
+std::shared_ptr<Node> FullyConnectedQuantized::fuse_bias(const ov::Output<Node>& bias) const {
+    switch (get_input_size()) {
+    case 9:
+        return std::make_shared<FullyConnectedQuantized>(input_value(0),
+                                                         input_value(1),
+                                                         bias,
+                                                         input_value(3),
+                                                         input_value(4),
+                                                         input_value(5),
+                                                         input_value(6),
+                                                         input_value(7),
+                                                         input_value(8),
+                                                         get_output_type());
+    case 6:
+        return std::make_shared<FullyConnectedQuantized>(input_value(0),
+                                                         input_value(1),
+                                                         bias,
+                                                         input_value(3),
+                                                         input_value(4),
+                                                         input_value(5),
+                                                         get_output_type());
+    case 5:
+        return std::make_shared<FullyConnectedQuantized>(input_value(0),
+                                                         input_value(1),
+                                                         bias,
+                                                         input_value(3),
+                                                         input_value(4),
+                                                         get_output_type());
+    case 4:
+        return std::make_shared<FullyConnectedQuantized>(input_value(0),
+                                                         input_value(1),
+                                                         bias,
+                                                         input_value(3),
+                                                         get_output_type());
+    }
+
+    OPENVINO_THROW("Unsupported number of inputs: ", get_input_size());
+}
+
+// @todo finalize validate_and_infer_types
+void FullyConnectedQuantized::validate_and_infer_types() {
+    const auto input_size = get_input_size();
+    NODE_VALIDATION_CHECK(this,
+                          input_size >= 3,
+                          "Number of inputs is incorrect. Current value is: ",
+                          input_size,
+                          ", expected at least 3.");
+
+    ov::op::v0::MatMul op;
+    op.set_transpose_a(false);
+    op.set_transpose_b(true);
+
+    auto out_shapes =
+        ov::op::v0::shape_infer(&op,
+                                std::vector<ov::PartialShape>{get_input_partial_shape(0), get_input_partial_shape(1)});
+
+    auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type;
+    set_output_type(0, output_type, out_shapes[0]);
+}
+
+bool FullyConnectedQuantized::visit_attributes(ov::AttributeVisitor& visitor) {
+    visitor.on_attribute("output_type", m_output_type);
+    return true;
+}
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ov
diff --git a/src/common/transformations/src/ov_ops/placeholder.cpp b/src/common/transformations/src/ov_ops/placeholder.cpp
new file mode 100644
index 00000000000000..17e4c3224a4ad6
--- /dev/null
+++ b/src/common/transformations/src/ov_ops/placeholder.cpp
@@ -0,0 +1,34 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "ov_ops/placeholder.hpp"
+
+#include "transformations/rt_info/fused_names_attribute.hpp"
+
+namespace ov {
+namespace op {
+namespace internal {
+
+Placeholder::Placeholder() : ov::op::Op() {
+    validate_and_infer_types();
+    // set_friendly_name(get_name());
+    // get_rt_info().emplace(FusedNames::get_type_info_static(), FusedNames{get_friendly_name()});
+}
+
+bool Placeholder::visit_attributes(ov::AttributeVisitor& visitor) {
+    return true;
+}
+
+void Placeholder::validate_and_infer_types() {
+    set_output_type(0, ov::element::undefined, ov::PartialShape{});
+}
+
+std::shared_ptr<Node> Placeholder::clone_with_new_inputs(const ov::OutputVector& new_args) const {
+    check_new_args_count(this, new_args);
+    return std::make_shared<Placeholder>();
+}
+
+}  // namespace internal
+}  // namespace op
+}  // namespace ov
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp
new file mode 100644
index 00000000000000..eb9653b4bd62fa
--- /dev/null
+++ b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp
@@ -0,0 +1,191 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_fc_to_compressed.hpp"
+
+#include <memory>
+
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/op/subtract.hpp"
+#include "openvino/op/transpose.hpp"
+#include "openvino/pass/pattern/op/or.hpp"
+#include "openvino/pass/pattern/op/pattern.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "ov_ops/fully_connected.hpp"
+#include "ov_ops/fully_connected_quantized.hpp"
+#include "transformations/utils/utils.hpp"
+
+ov::pass::ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed(
+    bool convert_u4zp_to_u8) {
+    using namespace ov::pass::pattern;
+
+    auto compressed_constant = [](const ov::Output<ov::Node>& output) {
+        return (output.get_element_type() == ov::element::u8 || output.get_element_type() == ov::element::i8 ||
+                output.get_element_type() == ov::element::u4 || output.get_element_type() == ov::element::i4 ||
+                output.get_element_type() == ov::element::nf4 || output.get_element_type() == ov::element::f4e2m1);
+        // output.get_target_inputs().size() == 1;
+        // output.get_target_inputs().size() > 0;
+    };
+
+    auto reshape_3d_to_2d = [](const ov::Output<ov::Node>& output) {
+        auto in_ps = output.get_node()->get_input_partial_shape(0);
+        auto out_ps = output.get_node()->get_output_partial_shape(0);
+        return in_ps.rank().is_static() && out_ps.rank().is_static() && in_ps.size() == 3 && out_ps.size() == 2;
+    };
+
+    auto weights_m = wrap_type<ov::op::v0::Constant>(compressed_constant);
+    auto convert_m = wrap_type<ov::op::v0::Convert>({weights_m});
+
+    // auto sub_const_m = wrap_type<ov::op::v0::Constant>(consumers_count(1));
+    auto sub_const_m = wrap_type<ov::op::v0::Constant>();
+    auto sub_convert_const_m = wrap_type<ov::op::v0::Convert>({sub_const_m});
+    auto sub_with_convert_m = wrap_type<ov::op::v1::Subtract>({convert_m, sub_convert_const_m});
+    auto sub_no_convert_m = wrap_type<ov::op::v1::Subtract>({convert_m, sub_const_m});
+    auto subtract_m = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{sub_with_convert_m, sub_no_convert_m});
+
+    // auto mul_const_m = wrap_type<ov::op::v0::Constant>(consumers_count(1));
+    auto mul_const_m = wrap_type<ov::op::v0::Constant>();
+    auto mul_convert_const_m = wrap_type<ov::op::v0::Convert>({mul_const_m});
+    auto mul_scale_m = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{mul_const_m, mul_convert_const_m});
+
+    auto mul_with_sub_m = wrap_type<ov::op::v1::Multiply>({subtract_m, mul_scale_m});
+    auto mul_no_sub_m = wrap_type<ov::op::v1::Multiply>({convert_m, mul_scale_m});
+    auto mul_m = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{mul_with_sub_m, mul_no_sub_m});
+
+    auto reshape_const_m = wrap_type<ov::op::v0::Constant>();
+    auto reshape_m = wrap_type<ov::op::v1::Reshape>({mul_m, reshape_const_m}, reshape_3d_to_2d);
+
+    auto transpose_input = std::make_shared<ov::pass::pattern::op::Or>(OutputVector{reshape_m, mul_m});
+    auto transpose_const_m = wrap_type<ov::op::v0::Constant>();
+    auto transpose_m = wrap_type<ov::op::v1::Transpose>({transpose_input, transpose_const_m});
+
+    auto data_m = any_input();
+    auto bias_m = any_input();
+    auto weights_input_m = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{reshape_m, transpose_m, mul_m});
+    auto fully_connected_m = wrap_type<ov::op::internal::FullyConnected>({data_m, weights_input_m, bias_m});
+
+    ov::matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](ov::pass::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+        OPENVINO_ASSERT(pattern_map.count(fully_connected_m));
+        OPENVINO_ASSERT(pattern_map.count(mul_const_m));
+        OPENVINO_ASSERT(pattern_map.count(weights_m));
+        OPENVINO_ASSERT(pattern_map.count(bias_m));
+        OPENVINO_ASSERT(pattern_map.count(convert_m));
+        auto fc = std::dynamic_pointer_cast<ov::op::internal::FullyConnected>(
+            pattern_map.at(fully_connected_m).get_node_shared_ptr());
+        if (!fc || transformation_callback(fc)) {
+            return false;
+        }
+
+        bool has_transpose = pattern_map.count(transpose_m);
+        auto scale_shape = pattern_map.at(mul_const_m).get_shape();
+        bool grouped = std::count_if(scale_shape.begin(), scale_shape.end(), [](size_t d) {
+                           return d > 1;
+                       }) > 1;
+
+        auto weights_shape = fc->get_input_shape(1);
+        const auto IC = *(weights_shape.rbegin());
+        const auto OC = *(weights_shape.rbegin() + 1);
+
+        const auto G = grouped ? (has_transpose ? *(scale_shape.rbegin() + 2) : *(scale_shape.rbegin() + 1)) : 1;
+
+        if (IC % G != 0 || IC / G < 4 || OC == 1) {
+            return false;
+        }
+
+        auto reshape_const_to_2d = [has_transpose, grouped](std::shared_ptr<ov::Node> node) {
+            auto constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
+            OPENVINO_ASSERT(constant != nullptr);
+            ov::Shape current_shape = constant->get_shape();
+            if (current_shape.size() <= 2)
+                return constant;
+
+            OPENVINO_ASSERT(current_shape.size() == 3);
+
+            auto new_shape = (has_transpose || !grouped)
+                                 ? ov::Shape{current_shape[0] * current_shape[1], current_shape[2]}
+                                 : ov::Shape{current_shape[0], current_shape[1] * current_shape[2]};
+
+            return std::make_shared<ov::op::v0::Constant>(*constant, new_shape);
+        };
+
+        auto convert_u4const_to_u8 = [convert_u4zp_to_u8](std::shared_ptr<ov::Node> node) {
+            auto constant = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
+            if (constant->get_element_type() != ov::element::u4 || !convert_u4zp_to_u8)
+                return std::dynamic_pointer_cast<ov::Node>(constant);
+            return std::dynamic_pointer_cast<ov::Node>(std::make_shared<ov::op::v0::Convert>(node, ov::element::u8));
+        };
+
+        const ov::Output<Node>& fc_input_a = fc->input(0).get_source_output();
+        const auto& scale = reshape_const_to_2d(pattern_map.at(mul_const_m).get_node_shared_ptr());
+        std::shared_ptr<ov::Node> optional_zero_point = nullptr;
+
+        const bool with_zero_point =
+            pattern_map.count(sub_no_convert_m) > 0 || pattern_map.count(sub_with_convert_m) > 0;
+        if (with_zero_point) {
+            // WA: Convert ZP to u8 for OneDNN case to avoid u4 reorder
+            optional_zero_point =
+                convert_u4const_to_u8(reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr()));
+        }
+
+        std::shared_ptr<ov::Node> fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr());
+        std::shared_ptr<ov::Node> fc_input_scale = scale;
+        std::shared_ptr<ov::Node> fc_input_zp = optional_zero_point;
+        std::shared_ptr<ov::Node> fc_input_bias = pattern_map.at(bias_m).get_node_shared_ptr();
+        std::vector<std::shared_ptr<ov::Node>> result_nodes = {};
+        if (has_transpose) {
+            const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr();
+            std::shared_ptr<ov::Node> transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr();
+            if (ov::shape_size(transpose_const->get_shape()) != fc_input_b->get_output_partial_shape(0).size()) {
+                std::vector<int32_t> new_order(fc_input_b->get_output_partial_shape(0).size());
+                std::iota(new_order.begin(), new_order.end(), 0);
+                std::swap(new_order[new_order.size() - 1], new_order[new_order.size() - 2]);
+                transpose_const =
+                    std::make_shared<ov::op::v0::Constant>(ov::element::i32, ov::Shape{new_order.size()}, new_order);
+            }
+
+            fc_input_b = transpose->clone_with_new_inputs({fc_input_b->output(0), transpose_const});
+            ov::disable_constant_folding(fc_input_b);
+            result_nodes.push_back(fc_input_b);
+            fc_input_scale = transpose->clone_with_new_inputs({scale->output(0), transpose_const});
+            ov::disable_constant_folding(fc_input_scale);
+            result_nodes.push_back(fc_input_scale);
+            if (with_zero_point && ov::shape_size(optional_zero_point->output(0).get_shape()) > 1) {
+                fc_input_zp = transpose->clone_with_new_inputs({optional_zero_point->output(0), transpose_const});
+                ov::disable_constant_folding(fc_input_zp);
+                result_nodes.push_back(fc_input_zp);
+            }
+        }
+
+        std::shared_ptr<ov::Node> new_fc = nullptr;
+        if (with_zero_point) {
+            new_fc = std::make_shared<ov::op::internal::FullyConnectedQuantized>(fc_input_a,
+                                                                                 fc_input_b,
+                                                                                 fc_input_bias,
+                                                                                 fc_input_scale,
+                                                                                 fc_input_zp,
+                                                                                 fc->get_output_type());
+        } else {
+            new_fc = std::make_shared<ov::op::internal::FullyConnectedQuantized>(fc_input_a,
+                                                                                 fc_input_b,
+                                                                                 fc_input_bias,
+                                                                                 fc_input_scale,
+                                                                                 fc->get_output_type());
+        }
+
+        result_nodes.push_back(new_fc);
+        new_fc->set_friendly_name(fc->get_friendly_name());
+        ov::copy_runtime_info(m.get_matched_nodes(), result_nodes);
+        ov::replace_node(fc, new_fc);
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(fully_connected_m,
+                                                          "ConvertFullyConnectedToFullyConnectedCompressed");
+    this->register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized.cpp b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized.cpp
new file mode 100644
index 00000000000000..3664ba3f039785
--- /dev/null
+++ b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized.cpp
@@ -0,0 +1,81 @@
+// Copyright (C) 2018-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/op_conversions/convert_fc_to_quantized.hpp"
+
+#include <memory>
+
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/multiply.hpp"
+#include "openvino/pass/pattern/op/label.hpp"
+#include "openvino/pass/pattern/op/pattern.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "ov_ops/fully_connected.hpp"
+#include "ov_ops/fully_connected_quantized.hpp"
+#include "ov_ops/placeholder.hpp"
+#include "transformations/utils/utils.hpp"
+
+ov::pass::ConvertFullyConnectedToFullyConnectedQuantized::ConvertFullyConnectedToFullyConnectedQuantized() {
+    using namespace ov::pass::pattern;
+
+    auto quantized_weights = [](const ov::Output<ov::Node>& output) {
+        return output.get_element_type() == ov::element::i8;
+    };
+
+    auto quantized_activations = [](const ov::Output<ov::Node>& output) {
+        return output.get_element_type() == ov::element::u8 || output.get_element_type() == ov::element::i8;
+    };
+
+    auto activations_m = pattern::any_input(quantized_activations);
+    auto weights_m = wrap_type<ov::op::v0::Constant>(quantized_weights);
+    // auto bias_m = wrap_type<ov::op::v0::Constant>();
+    auto bias_m = pattern::any_input();
+
+    auto fully_connected_m = wrap_type<ov::op::internal::FullyConnected>({activations_m, weights_m, bias_m});
+    auto dequantization_scales_m = wrap_type<ov::op::v0::Constant>();
+    auto multiply_m = wrap_type<ov::op::v1::Multiply>({fully_connected_m, dequantization_scales_m});
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) {
+        const auto& pattern_map = m.get_pattern_value_map();
+
+        auto fc_output = pattern_map.at(fully_connected_m);
+        auto activations = pattern_map.at(activations_m);
+        auto weights = pattern_map.at(weights_m);
+        auto bias = pattern_map.at(bias_m);
+        auto multiply = pattern_map.at(multiply_m);
+        auto dequantization_scales = pattern_map.at(dequantization_scales_m);
+        const auto& fc_output_shape = fc_output.get_partial_shape();
+        const auto& multiply_output_shape = multiply.get_partial_shape();
+
+        if (*fc_output_shape.rbegin() != *multiply_output_shape.rbegin()) {
+            return false;
+        }
+
+        auto fc_node = std::dynamic_pointer_cast<ov::op::internal::FullyConnected>(
+            pattern_map.at(fully_connected_m).get_node_shared_ptr());
+
+        auto fc_quantized = std::make_shared<ov::op::internal::FullyConnectedQuantized>(
+            activations,
+            weights,
+            bias,
+            std::make_shared<ov::op::internal::Placeholder>(),
+            std::make_shared<ov::op::internal::Placeholder>(),
+            std::make_shared<ov::op::internal::Placeholder>(),
+            std::make_shared<ov::op::internal::Placeholder>(),
+            dequantization_scales,
+            std::make_shared<ov::op::internal::Placeholder>(),
+            fc_node->get_output_type());
+
+        auto multiply_node = multiply.get_node_shared_ptr();
+        fc_quantized->set_friendly_name(multiply_node->get_friendly_name());
+        ov::copy_runtime_info({multiply_node, fc_node}, fc_quantized);
+        ov::replace_node(multiply_node, fc_quantized);
+
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(multiply_m, "ConvertFullyConnectedToFullyConnectedQuantized");
+    this->register_matcher(m, callback);
+}
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_gather_to_compressed.cpp b/src/common/transformations/src/transformations/op_conversions/convert_gather_to_compressed.cpp
index 156481fb893227..ee88a47f8b228e 100644
--- a/src/common/transformations/src/transformations/op_conversions/convert_gather_to_compressed.cpp
+++ b/src/common/transformations/src/transformations/op_conversions/convert_gather_to_compressed.cpp
@@ -24,7 +24,8 @@ ov::pass::ConvertGatherToGatherCompressed::ConvertGatherToGatherCompressed() {
     auto compressed_constant = [](const ov::Output<ov::Node>& output) {
         return (output.get_element_type() == ov::element::u8 || output.get_element_type() == ov::element::i8 ||
                 output.get_element_type() == ov::element::u4 || output.get_element_type() == ov::element::i4) &&
-               output.get_target_inputs().size() == 1 &&
+               // output.get_target_inputs().size() == 1 &&
+               output.get_target_inputs().size() > 0 &&
                (output.get_shape().size() == 2 || output.get_shape().size() == 3);
     };
 
diff --git a/src/core/src/pass/graph_rewrite.cpp b/src/core/src/pass/graph_rewrite.cpp
index 00eafa873043c3..249f2faa9e1c72 100644
--- a/src/core/src/pass/graph_rewrite.cpp
+++ b/src/core/src/pass/graph_rewrite.cpp
@@ -280,11 +280,12 @@ void ov::pass::MatcherPass::register_matcher(const std::shared_ptr<ov::pass::pat
     set_property(property, true);
     m_matcher = m;
     m_handler = [m, callback](const std::shared_ptr<Node>& node) -> bool {
+        OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " matching ", node);
         if (m->match(node->output(0))) {
-            OPENVINO_DEBUG("Matcher ", m->get_name(), " matched ", node);
+            OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " matched ", node);
             OV_PASS_CALLBACK(m);
             const bool status = callback(*m.get());
-            OPENVINO_DEBUG("Matcher ", m->get_name(), " callback ", (status ? "succeded" : "failed"));
+            OPENVINO_DEBUG("[MATCHER] ", m->get_name(), " callback ", (status ? "succeded" : "failed"));
             // explicitly clear Matcher state because it holds pointers to matched nodes
             m->clear_state();
             return status;
diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp
index 68900b150514bc..8f2c02ad575d89 100644
--- a/src/frontends/ir/src/ir_deserializer.cpp
+++ b/src/frontends/ir/src/ir_deserializer.cpp
@@ -9,6 +9,7 @@
 
 #include "openvino/core/except.hpp"
 #include "openvino/core/meta_data.hpp"
+#include "openvino/core/type.hpp"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/loop.hpp"
@@ -830,7 +831,9 @@ std::shared_ptr<ov::Node> ov::XmlDeserializer::create_node(const std::vector<ov:
                            " has incorrect input with index ",
                            i,
                            "!");
-        if (ov::element::Type_t::undefined == inputs[i].get_element_type())
+
+        if (is_type<op::v0::Parameter>(inputs[i].get_node_shared_ptr()) &&
+            ov::element::Type_t::undefined == inputs[i].get_element_type())
             OPENVINO_THROW(params.type,
                            " layer ",
                            params.name,
diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp
index d3245312a16efc..f69ae6eecfd0a1 100644
--- a/src/plugins/intel_cpu/src/cpu_types.cpp
+++ b/src/plugins/intel_cpu/src/cpu_types.cpp
@@ -35,12 +35,14 @@ static const TypeToNameMap& get_type_to_name_tbl() {
     static const TypeToNameMap type_to_name_tbl = {
         {"Constant", Type::Input},
         {"Parameter", Type::Input},
+        {"PlaceHolder", Type::Input},
         {"Result", Type::Output},
         {"Eye", Type::Eye},
         {"Convolution", Type::Convolution},
         {"GroupConvolution", Type::Convolution},
         {"MatMul", Type::MatMul},
         {"FullyConnected", Type::FullyConnected},
+        {"FullyConnectedQuantized", Type::FullyConnected},
         {"MaxPool", Type::Pooling},
         {"AvgPool", Type::Pooling},
         {"AdaptiveMaxPool", Type::AdaptivePooling},
diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
index 2f82fbe553ae19..bdea332e74b001 100644
--- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
+++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
@@ -11,21 +11,68 @@
 #include <memory>
 #include <oneapi/dnnl/dnnl.hpp>
 
+#include "cpu_types.h"
 #include "memory_desc/dnnl_blocked_memory_desc.h"
 #include "openvino/core/type/element_type.hpp"
+#include "utils/cpu_utils.hpp"
 #include "utils/debug_capabilities.h"
 
 namespace ov {
 namespace intel_cpu {
 
+static std::vector<float> getDeQuantizedScales(const MemoryArgs& memory) {
+    if (!memory.count(ARG_DST | ARG_ATTR_SCALES))
+        return {};
+
+    auto scalesBlob = memory.at(ARG_DST | ARG_ATTR_SCALES);
+
+    auto scalesData = static_cast<const float*>(scalesBlob->getData());
+
+    if (!scalesData)
+        return {};
+
+    auto dstShape = memory.at(ARG_DST)->getShape();
+    auto dqScalesShape = memory.at(ARG_DST | ARG_ATTR_SCALES)->getShape();
+
+    auto scalesDims = getNormalizedDimsBySize(dqScalesShape.getDims(), dstShape.getDims().size());
+
+    auto scaleSize = std::accumulate(scalesDims.begin(), scalesDims.end(), std::size_t(1), std::multiplies<size_t>());
+
+    std::vector<float> DQScales(scaleSize, 1.0);
+
+    // OPENVINO_ASSERT(scaleSize == 1 || DQScales.size() == 1 || DQScales.size() == scaleSize,
+    //                 "set invalid scales size , DQScales vector size: ",
+    //                 DQScales.size(),
+    //                 ", scale data size: ",
+    //                 scaleSize);
+
+    if (scaleSize > DQScales.size())
+        DQScales.resize(scaleSize, DQScales[0]);
+    if (1 == scaleSize) {
+        std::transform(DQScales.begin(), DQScales.end(), DQScales.begin(), [=](float val) {
+            return (scalesData[0] * val);
+        });
+    } else {
+        for (size_t i = 0; i < DQScales.size(); i++) {
+            DQScales[i] *= scalesData[i];
+        }
+    }
+    if (std::all_of(DQScales.begin(), DQScales.end(), [&](float val) {
+            return (val == DQScales[0]);
+        }))
+        DQScales.resize(1);
+
+    return DQScales;
+}
+
 DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps,
                                          const dnnl::engine& engine,
                                          const VectorDims& outputDims,
                                          const size_t indexOfOutputChannelDim,
                                          const bool isInt8,
                                          const int weiScaleMaskPerChannel,
-                                         const std::vector<float>& DQScales,
-                                         const bool hasBias,
+                                         const bool weightsWithBatch,
+                                         const MemoryArgs& memory,
                                          const dnnl::memory::data_type outDataType)
     : engine(engine),
       postOps(postOps),
@@ -33,12 +80,14 @@ DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps,
       idxOC(indexOfOutputChannelDim),
       isINT8(isInt8),
       weightScaleMaskPerChannel(weiScaleMaskPerChannel),
+      weightsWithBatch(weightsWithBatch),
       outDataType(outDataType) {
     OPENVINO_ASSERT(idxOC >= 0 && static_cast<size_t>(idxOC) < outputDims.size());
     OC = outputDims[idxOC];
     dimsPerOC = dimsPerTensor = VectorDims(outputDims.size(), 1);
     dimsPerOC[idxOC] = OC;
 
+    const auto& DQScales = getDeQuantizedScales(memory);
     // generalise dq scales, so extra logic is necessary here.
     if (isINT8) {
         wei_scale_values = DQScales.empty() ? std::vector<float>{1.0} : DQScales;
@@ -49,6 +98,7 @@ DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps,
         updateWeiScales();
         // If having the bias, attr weight scale can't be updated for further ops-ops optimization.
         // ONEDNN 3.x quantization for scheme: QuantizedInput * QuantizedWeight * DQScale + Bias.
+        const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
         weightScaleAvailable = !hasBias;
     } else if (!DQScales.empty()) {
         // DQ scale is fused but swiching back to non-INT8 for execution in some cases.
@@ -325,9 +375,9 @@ static OptimizedFormula updateOptimizedFormula(const FakeQuantizePostOp& postOp,
 }
 
 bool DnnlPostOpsComposer::appendAttrPostOps(const FakeQuantizePostOp& postOp,
-                                               bool isLastPostOp,
-                                               bool doRounding,
-                                               bool allowBinary) {
+                                            bool isLastPostOp,
+                                            bool doRounding,
+                                            bool allowBinary) {
     DEBUG_LOG("isLastPostOp=",
               isLastPostOp,
               ", outDataType=",
@@ -541,9 +591,9 @@ bool DnnlPostOpsComposer::appendShift(const std::vector<float>& shift, bool allo
 }
 
 bool DnnlPostOpsComposer::appendLinear(const std::vector<float>& scale,
-                                          const std::vector<float>& shift,
-                                          bool isLastPostOp,
-                                          bool allowBinary) {
+                                       const std::vector<float>& shift,
+                                       bool isLastPostOp,
+                                       bool allowBinary) {
     if (scale.size() == 1 && shift.size() == 1) {
         if (shift[0] == 0.0f)
             return appendScale(scale, isLastPostOp, allowBinary);
@@ -594,20 +644,45 @@ void DnnlPostOpsComposer::appendClip(const std::vector<float>& low, const std::v
 static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr,
                                             bool needTranspose,
                                             ov::element::Type dstPrc,
-                                            const dnnl::engine& engine) {
+                                            const dnnl::engine& engine,
+                                            bool weightsWithBatch) {
     auto shape = paramsPtr->getShape().getStaticDims();
     if (shape.size() == 1 && shape[0] == 1) {
         shape.push_back(1);
     }
+
     if (shape.size() != 2 && shape.size() != 3)
-         OPENVINO_THROW("DnnlPostOpsComposer cannot prepack decompression params with invalid shape");
+        OPENVINO_THROW("DnnlPostOpsComposer cannot prepack decompression params with invalid shape");
+
+    size_t OC = 0;
+    size_t G = 0;
+    // if (weightsWithBatch) {
+    //     OC = needTranspose ? shape[shape.size() - 2] : shape[shape.size() - 1];
+    //     G = needTranspose ? shape[shape.size() - 1] : shape[shape.size() - 2];
+    // } else {
+    //     OC = needTranspose ? shape.front() : shape.back();
+    //     G = needTranspose ? shape[1] : shape.front();
+    // }
+    if (weightsWithBatch) {
+        OC = shape[shape.size() - 2];
+        G = shape[shape.size() - 1];
+    } else {
+        OC = shape.front();
+        G = shape[1];
+    }
 
-    Shape dstShape = needTranspose ? Shape({shape[0], shape[1]}) : Shape({shape[shape.size() - 1], shape[0]});
-    DnnlBlockedMemoryDesc dstMemoryDesc(dstShape, DnnlExtensionUtils::ElementTypeToDataType(dstPrc), dnnl::memory::format_tag::io);
-    auto dstMem = std::make_shared<Memory>(engine, dstMemoryDesc);
+    Shape dstShape = Shape({OC, G});
 
+    DnnlBlockedMemoryDesc dstMemoryDesc(dstShape,
+                                        DnnlExtensionUtils::ElementTypeToDataType(dstPrc),
+                                        dnnl::memory::format_tag::io);
+    auto dstMem = std::make_shared<Memory>(engine, dstMemoryDesc);
     auto srcFormat = needTranspose ? dnnl::memory::format_tag::oi : dnnl::memory::format_tag::io;
-    DnnlBlockedMemoryDesc srcMemoryDesc(dstShape, DnnlExtensionUtils::ElementTypeToDataType(paramsPtr->getDescPtr()->getPrecision()), srcFormat);
+
+    DnnlBlockedMemoryDesc srcMemoryDesc(
+        dstShape,
+        DnnlExtensionUtils::ElementTypeToDataType(paramsPtr->getDescPtr()->getPrecision()),
+        srcFormat);
     auto srcMem = std::make_shared<Memory>(engine, srcMemoryDesc, paramsPtr->getData());
 
     dstMem->load(*srcMem);
@@ -615,25 +690,32 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr,
     return dstMem;
 }
 
-void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr, bool needTranspose, ov::element::Type dstPrecision) {
+void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr,
+                                                    bool needTranspose,
+                                                    ov::element::Type dstPrecision) {
     if (scales_ptr == nullptr)
         return;
 
-    auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine);
+    auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine, weightsWithBatch);
     attr.set_scales_dims(DNNL_ARG_WEIGHTS,
-        DnnlExtensionUtils::convertToDnnlDims(scalesMem->getStaticDims()), DnnlExtensionUtils::ElementTypeToDataType(dstPrecision));
+                         DnnlExtensionUtils::convertToDnnlDims(scalesMem->getStaticDims()),
+                         DnnlExtensionUtils::ElementTypeToDataType(dstPrecision));
     cpuArgs[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] = std::move(scalesMem);
     dnnlArgs[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS] =
         cpuArgs[DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS]->getPrimitive();
 }
 
-void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_points_ptr, bool needTranspose, ov::element::Type dstPrecision) {
+void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_points_ptr,
+                                                        bool needTranspose,
+                                                        ov::element::Type dstPrecision) {
     if (zero_points_ptr == nullptr)
         return;
 
-    auto zeroPointsMem = prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine);
+    auto zeroPointsMem =
+        prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine, weightsWithBatch);
     attr.set_zero_points_dims(DNNL_ARG_WEIGHTS,
-        DnnlExtensionUtils::convertToDnnlDims(zeroPointsMem->getStaticDims()), DnnlExtensionUtils::ElementTypeToDataType(dstPrecision));
+                              DnnlExtensionUtils::convertToDnnlDims(zeroPointsMem->getStaticDims()),
+                              DnnlExtensionUtils::ElementTypeToDataType(dstPrecision));
     cpuArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS] = zeroPointsMem;
     dnnlArgs[DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS] = zeroPointsMem->getPrimitive();
 }
diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.h b/src/plugins/intel_cpu/src/dnnl_postops_composer.h
index c07ec0f608b6db..091b3ed9470205 100644
--- a/src/plugins/intel_cpu/src/dnnl_postops_composer.h
+++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.h
@@ -27,8 +27,8 @@ class DnnlPostOpsComposer {
                         const size_t indexOfOutputChannelDim,
                         const bool isINT8,
                         const int weiScaleMaskPerChannel,
-                        const std::vector<float>& DQScales,
-                        const bool hasBias,
+                        const bool weightsWithBatch,
+                        const MemoryArgs& memory,
                         const dnnl::memory::data_type outDataType);
     DnnlPrimitiveAttrs compose();
     void appendDecompressionScales(const MemoryCPtr& scales_ptr, bool needTranspose, ov::element::Type dstPrecision);
@@ -59,6 +59,7 @@ class DnnlPostOpsComposer {
     size_t idxOC;
     const bool isINT8;  // only INT8 primitive support scales
     const int weightScaleMaskPerChannel;
+    bool weightsWithBatch;
     bool weightScaleAvailable = false;
     const dnnl::memory::data_type outDataType;
 
diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp
index c314718bb82416..70a69c27d47989 100644
--- a/src/plugins/intel_cpu/src/edge.cpp
+++ b/src/plugins/intel_cpu/src/edge.cpp
@@ -5,6 +5,7 @@
 #include "edge.h"
 #include "node.h"
 #include "dnnl_extension_utils.h"
+#include "openvino/core/type/element_type.hpp"
 #include "openvino/util/pp.hpp"
 
 using namespace dnnl;
@@ -212,6 +213,10 @@ Edge::ReorderStatus Edge::needReorder() {
     bool optimized = false;
     auto inputPortDesc = getInputPortDesc();
     auto outPortDesc = getOutputPortDesc();
+
+    if (inputPortDesc->getMemDesc()->getPrecision() == element::undefined)
+        return ReorderStatus::No;
+
     // Check whether the child node may accept the parent produced tensor
     if (!outPortDesc->isCompatible(*inputPortDesc)) {
         // Performance optimization which exploit the fact that some tensors do not need actual data reordering to be read using different descriptors
@@ -411,6 +416,9 @@ const MemoryDesc& Edge::getOutputDesc() const {
 }
 
 const MemoryDesc& Edge::getDesc() const {
+    if (getInputDesc().getPrecision() == element::undefined)
+        return getInputDesc();
+
     if (!getInputDesc().isCompatible(getOutputDesc()))
         OPENVINO_THROW("Cannot get descriptor for edge: ", getParent()->getName(), "->", getChild()->getName());
 
diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp
index d5a8801ffedeac..85ff582663dec8 100644
--- a/src/plugins/intel_cpu/src/extension.cpp
+++ b/src/plugins/intel_cpu/src/extension.cpp
@@ -7,10 +7,13 @@
 #include "openvino/core/op_extension.hpp"
 #include "ov_ops/augru_cell.hpp"
 #include "ov_ops/augru_sequence.hpp"
+#include "ov_ops/fully_connected.hpp"
+#include "ov_ops/fully_connected_quantized.hpp"
 #include "ov_ops/gather_compressed.hpp"
 #include "ov_ops/multiclass_nms_ie_internal.hpp"
 #include "ov_ops/nms_ie_internal.hpp"
 #include "ov_ops/nms_static_shape_ie.hpp"
+#include "ov_ops/placeholder.hpp"
 #include "ov_ops/rotary_positional_embeddings.hpp"
 #include "ov_ops/type_relaxed.hpp"
 #include "snippets/op/subgraph.hpp"
@@ -82,6 +85,9 @@ class TypeRelaxedExtension : public ov::OpExtension<ov::op::TypeRelaxed<Op>> {
     OP_EXTENSION(ov::op::internal::AUGRUSequence)                           \
     OP_EXTENSION(ov::op::internal::NmsStaticShapeIE<ov::op::v8::MatrixNms>) \
     OP_EXTENSION(ov::op::internal::RoPE)                                    \
+    OP_EXTENSION(ov::op::internal::FullyConnected)                          \
+    OP_EXTENSION(ov::op::internal::FullyConnectedQuantized)                 \
+    OP_EXTENSION(ov::op::internal::Placeholder)                             \
     OP_EXTENSION_X64(ov::intel_cpu::MHANode)                                \
     OP_EXTENSION_X64(ov::intel_cpu::InteractionNode)                        \
     OP_EXTENSION_X64(ov::intel_cpu::LLMMLPNode)                             \
diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp
index 6b3175e24d9dcb..77f49505e80ee8 100644
--- a/src/plugins/intel_cpu/src/graph_optimizer.cpp
+++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@@ -67,10 +67,6 @@ void GraphOptimizer::ApplyCommonGraphOptimizations(Graph &graph) {
     FuseConvMatmulFCDeconvAndDQScales(graph);
     graph.RemoveDroppedNodes();
 
-    OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseFCAndWeightsDecompression");
-    FuseFCAndWeightsDecompression(graph);
-    graph.RemoveDroppedNodes();
-
     OV_ITT_SCOPE_NEXT(FIRST_INFERENCE, taskChain, "FuseConvolutionAndBias");
     FuseConvolutionMatMulDeconvAndBias(graph);
     graph.RemoveDroppedNodes();
@@ -212,9 +208,8 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph &graph) {
         auto parentNode = node->getParentEdgeAt(0)->getParent();
         auto scaleNode = node->getParentEdgeAt(1)->getParent();
         if (!(parentNode->getType() == Type::Convolution
-                        || parentNode->getType() == Type::MatMul
-                        || parentNode->getType() == Type::Deconvolution
-                        || parentNode->getType() == Type::FullyConnected))
+              || parentNode->getType() == Type::MatMul
+              || parentNode->getType() == Type::Deconvolution))
             return false;
         if (!scaleNode->isConstant())
             return false;
@@ -288,257 +283,6 @@ void GraphOptimizer::FuseConvMatmulFCDeconvAndDQScales(Graph &graph) {
     }
 }
 
-void GraphOptimizer::FuseFCAndWeightsDecompression(Graph &graph) {
-    std::set<ov::element::Type> supportedWeightsPrecisions{
-        ov::element::u8, ov::element::i8, ov::element::nf4, ov::element::u4, ov::element::i4, ov::element::f4e2m1};
-    const std::set<ov::element::Type> supportedDataPrecisions{ov::element::f32, ov::element::bf16};
-    auto expectedNode = [](NodePtr node, Type expectedType) {
-        return node->getType() == expectedType && node->getChildEdges().size() == 1;
-    };
-
-#define SKIP_FUSION_FOR_NODE(node)                                                   \
-    DEBUG_LOG("FuseFCAndWeightsDecompression can't be applied for node ", node->getName()); \
-    continue
-
-    if (!impl::cpu::x64::mayiuse(impl::cpu::x64::avx2))
-        return;
-
-    auto& graphNodes = graph.GetNodes();
-    for (size_t i = 0; i < graphNodes.size(); i++) {
-        const auto fcNode = std::dynamic_pointer_cast<node::FullyConnected>(graphNodes[i]);
-        if (fcNode == nullptr)
-            continue;
-
-        auto parent = fcNode->getParentEdgeAt(1)->getParent();
-        const bool withTranspose = parent->getType() == Type::Transpose;
-        const NodePtr transposeNode = withTranspose ? parent : nullptr;
-        if (transposeNode)
-            parent = transposeNode->getParentEdgeAt(0)->getParent();
-        // Compressed weights can be shared between several FC layers
-        const bool is_shared_decompression = parent->getChildEdges().size() > 1;
-
-        const bool withReshape = parent->getType() == Type::Reshape;
-        const auto reshapeNode = withReshape ? parent : nullptr;
-        if (reshapeNode) {
-            parent = reshapeNode->getParentEdgeAt(0)->getParent();
-        }
-
-        const auto multiplyNode = parent;
-        if (multiplyNode->getType() != Type::Eltwise || multiplyNode->getAlgorithm() != Algorithm::EltwiseMultiply ||
-            !multiplyNode->isConstant()) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-
-        CPU_GRAPH_OPTIMIZER_SCOPE(FuseFCAndWeightsDecompression);
-        const auto mulParent1 = multiplyNode->getParentEdgeAt(1)->getParent();
-        NodePtr multiplyParent, multiplyConvertNode, multiplyConstNode;
-        multiplyParent = mulParent1;
-        if (multiplyParent->getType() == Type::Convert) {
-            multiplyConvertNode = multiplyParent;
-            multiplyParent = multiplyConvertNode->getParentEdgeAt(0)->getParent();
-        }
-        multiplyConstNode = multiplyParent;
-        if (multiplyConstNode->getType() != Type::Input) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-        const bool withMultiplyConvert = multiplyConvertNode != nullptr;
-
-        const auto mulParent0 = multiplyNode->getParentEdgeAt(0)->getParent();
-        const bool withSubtract = mulParent0->getAlgorithm() == Algorithm::EltwiseSubtract;
-        NodePtr subtractNode, subtractConvertNode, subtractConstNode;
-        if (withSubtract) {
-            subtractNode = mulParent0;
-            if (!expectedNode(subtractNode, Type::Eltwise)) {
-                SKIP_FUSION_FOR_NODE(fcNode);
-            }
-            auto subtractParent = subtractNode->getParentEdgeAt(1)->getParent();
-            if (subtractParent->getType() == Type::Convert) {
-                subtractConvertNode = subtractParent;
-                subtractParent = subtractConvertNode->getParentEdgeAt(0)->getParent();
-            }
-            subtractConstNode = subtractParent;
-            if (subtractConstNode->getType() != Type::Input) {
-                SKIP_FUSION_FOR_NODE(fcNode);
-            }
-        }
-
-        const bool withSubtractConvert = subtractConvertNode != nullptr;
-        const auto convertNode = withSubtract ? subtractNode->getParentEdgeAt(0)->getParent() : mulParent0;
-        if (!expectedNode(convertNode, Type::Convert)) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-        const auto weightsNode = convertNode->getParentEdgeAt(0)->getParent();
-        if (weightsNode->getType() != Type::Input) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-
-        // Precision limitations
-        if (supportedDataPrecisions.find(fcNode->getOriginalInputPrecisionAtPort(0)) == supportedDataPrecisions.end()) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-        if (supportedWeightsPrecisions.find(weightsNode->getOriginalOutputPrecisionAtPort(0)) == supportedWeightsPrecisions.end()) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-        if (withSubtract &&
-            !one_of(subtractConstNode->getOriginalOutputPrecisionAtPort(0), weightsNode->getOriginalOutputPrecisionAtPort(0), ov::element::f32)) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-
-        // Shape limitations
-        const auto weightsShape = weightsNode->getOutputShapeAtPort(0);
-        if (weightsShape != multiplyNode->getOutputShapeAtPort(0)) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-        if (reshapeNode && (reshapeNode->getInputShapeAtPort(0).getRank() != 3 || reshapeNode->getOutputShapeAtPort(0).getRank() != 2)) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-
-        VectorDims decompressionConstShape;
-        const auto fcInputWeightsShape = fcNode->getInputShapeAtPort(1);
-        int groupNum = 1;
-        // Ordinary case: one decompression group
-        if (fcInputWeightsShape.getRank() == weightsShape.getRank()) {
-            const auto& out_channels = fcInputWeightsShape.getDims()[0];
-            decompressionConstShape = withTranspose ? VectorDims{1, out_channels} : VectorDims{out_channels, 1};
-        } else {
-            // Group decompression case: last 3 dimension (there could be also prepending '1's in the beginning) of weights shape must be:
-            // [N, G, O], if transpose = true
-            // [O, N, G], otherwise.
-            // O - output channels
-            // N - number of groups
-            // G - group size
-            const auto& weights_dims = weightsShape.getStaticDims();
-            const auto& N = withTranspose ? *(weights_dims.rbegin() + 2) : *(weights_dims.rbegin() + 1);
-            const auto& O = withTranspose ? *weights_dims.rbegin() : *(weights_dims.rbegin() + 2);
-            // Group decompression is applied by O and N dims
-            decompressionConstShape = withTranspose ? VectorDims{N, 1, O} : VectorDims{O, N, 1};
-            groupNum = N;
-        }
-
-        auto check_decompression_shape = [&decompressionConstShape](const VectorDims& shape_to_check) {
-            if (shape_to_check.size() > decompressionConstShape.size())
-                return false;
-            if (std::all_of(shape_to_check.begin(), shape_to_check.end(), [](Dim x) { return x == 1; }))
-                return true;
-            const auto comparison_start_pos = decompressionConstShape.size() - shape_to_check.size();
-            // in case of different ranks shapes are compared taking into account ranks numpy broadcasting
-            return std::equal(shape_to_check.begin(), shape_to_check.end(), decompressionConstShape.begin() + comparison_start_pos);
-        };
-        if (!check_decompression_shape(multiplyConstNode->getOutputShapeAtPort(0).getDims())) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-        if (withSubtract && !check_decompression_shape(subtractConstNode->getOutputShapeAtPort(0).getDims())) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-
-        const size_t OC = fcInputWeightsShape.getDims()[0];
-        const size_t IC = fcInputWeightsShape.getDims()[1];
-        // HW specific shape limitations
-        if (impl::cpu::x64::mayiuse(impl::cpu::x64::avx512_core_amx) &&
-            fcNode->getOriginalInputPrecisionAtPort(0) == ov::element::bf16) {
-            // OneDNN AMX IP implementation has limited shapes support due to performance considerations. As a current solution conditions below are copied
-            // from OneDNN to make sure correct IP impl will be used since fallback one doesn't support weights decompression feature.
-            size_t simdWidth = 16;
-            size_t vnniFactor = 2;
-            size_t maxSize = 512;
-            auto amxRow = vnniFactor * simdWidth;
-
-            if ((IC <= amxRow && OC <= amxRow) || (IC <= maxSize && OC <= maxSize && IC % amxRow != 0)) {
-                SKIP_FUSION_FOR_NODE(fcNode);
-            }
-        }
-
-        // OneDNN IP primitive provides limited decompression params support
-        if (IC % groupNum != 0 || IC / groupNum < 4 || OC == 1) {
-            SKIP_FUSION_FOR_NODE(fcNode);
-        }
-
-        // Fusion processing
-        auto *multiplyInputNode = dynamic_cast<node::Input *>(multiplyConstNode.get());
-        OPENVINO_ASSERT(multiplyInputNode, "Cannot cast ", multiplyConstNode->getName(), " to Input node.");
-        fcNode->fuseDecompressionMultiply(multiplyInputNode->getMemoryPtr());
-
-        if (withSubtract) {
-            auto *subtractInputNode = dynamic_cast<node::Input *>(subtractConstNode.get());
-            OPENVINO_ASSERT(multiplyInputNode, "Cannot cast ", subtractConstNode->getName(), " to Input node.");
-            fcNode->fuseDecompressionSubtract(subtractInputNode->getMemoryPtr());
-        }
-
-        fcNode->addOriginalLayer(multiplyNode->getOriginalLayers());
-        fcNode->addOriginalLayer(convertNode->getOriginalLayers());
-        if (withSubtract)
-            fcNode->addOriginalLayer(subtractNode->getOriginalLayers());
-        if (withSubtractConvert)
-            fcNode->addOriginalLayer(subtractConvertNode->getOriginalLayers());
-        if (withMultiplyConvert)
-            fcNode->addOriginalLayer(multiplyConvertNode->getOriginalLayers());
-
-        const auto& weightsPrecision = weightsNode->getOriginalOutputPrecisionAtPort(0);
-        if (withTranspose) {
-            transposeNode->setOriginalInputPrecisionAtPort(0, weightsPrecision);
-            transposeNode->setOriginalOutputPrecisionAtPort(0, weightsPrecision);
-        }
-        if (withReshape) {
-            reshapeNode->setOriginalInputPrecisionAtPort(0, weightsPrecision);
-            reshapeNode->setOriginalOutputPrecisionAtPort(0, weightsPrecision);
-        }
-        fcNode->setOriginalInputPrecisionAtPort(1, weightsPrecision);
-
-        // If decompression subgraph is shared with other nodes, it mustn't be removed.
-        // In this case, the current FC is reconnected to the weights
-        if (is_shared_decompression) {
-            const auto weights_out_edge = weightsNode->getChildEdges()[0].lock();
-            const auto fc_weights_path_edge = withTranspose ? transposeNode->getParentEdgeAt(0)
-                                                            : fcNode->getParentEdgeAt(1);
-            const auto inNum = weights_out_edge->getInputNum();
-            const auto outNum = fc_weights_path_edge->getOutputNum();
-            graph.RemoveEdge(fc_weights_path_edge);
-            // In case of shared group decompression, Reshape node has to be copied for the current FC
-            if (withReshape) {
-                const auto& reshapeOutShape = reshapeNode->getOutputShapeAtPort(0).getStaticDims();
-                auto reshapeConst = std::make_shared<ov::opset1::Constant>(ov::element::i32,
-                                                                           ov::Shape{reshapeOutShape.size()},
-                                                                           reshapeOutShape);
-                auto reshapeDummyInput = std::make_shared<ov::opset1::Parameter>(reshapeNode->getOriginalInputPrecisionAtPort(0),
-                                                                                 reshapeNode->getInputShapeAtPort(0).toPartialShape());
-                const auto reshape = std::make_shared<ov::opset1::Reshape>(reshapeDummyInput, reshapeConst, false);
-                reshape->set_friendly_name(reshapeNode->getName() + "_copy");
-                const auto cpuReshape = std::make_shared<ov::intel_cpu::node::Reshape>(reshape, graph.getGraphContext());
-                graph.InsertNode(weightsNode, withTranspose ? transposeNode : fcNode, cpuReshape, inNum, outNum, false);
-                const auto cpuReshapeConst = std::make_shared<node::Input>(reshapeConst, graph.getGraphContext());
-                graph.AddNode(cpuReshapeConst);
-                graph.CreateEdge(cpuReshapeConst, cpuReshape, 0, 1);
-            } else {
-                graph.CreateEdge(weightsNode, withTranspose ? transposeNode : fcNode, inNum, outNum);
-            }
-        } else {
-            // If decompression subgraph is not shared with other nodes, it can be removed
-            if (withSubtract)
-                graph.RemoveEdge(subtractNode->getParentEdgeAt(1));
-            if (withSubtractConvert) {
-                // SubtractConvert is removed only if there are no other consumers (e.g. CompressedGather)
-                const auto& restChilds = subtractConvertNode->getChildEdges();
-                if (restChilds.empty())
-                    graph.RemoveEdge(subtractConvertNode->getParentEdgeAt(0));
-            }
-            graph.RemoveEdge(multiplyNode->getParentEdgeAt(1));
-            if (withMultiplyConvert) {
-                // MultiplyConvert is removed only if there are no other consumers (e.g. CompressedGather)
-                const auto& restChilds = multiplyConvertNode->getChildEdges();
-                if (restChilds.empty())
-                    graph.RemoveEdge(multiplyConvertNode->getParentEdgeAt(0));
-            }
-
-            graph.DropNode(convertNode);
-            if (withSubtract)
-                graph.DropNode(subtractNode);
-            graph.DropNode(multiplyNode);
-        }
-        DEBUG_LOG("FuseFCAndWeightsDecompression finished for node ", fcNode->getName());
-    }
-#undef SKIP_FUSION_FOR_NODE
-}
-
 void GraphOptimizer::FuseConvolutionMatMulDeconvAndBias(Graph &graph) {
     auto& graphNodes = graph.GetNodes();
 
@@ -552,7 +296,7 @@ void GraphOptimizer::FuseConvolutionMatMulDeconvAndBias(Graph &graph) {
             return false;
 
         if (!deconv)
-            return (one_of(node->getType(), Type::Convolution, Type::MatMul, Type::FullyConnected) &&
+            return (one_of(node->getType(), Type::Convolution, Type::MatMul) &&
                    node->getParentEdges().size() == 2);
         else
             return deconv->canFuseBias();
@@ -980,9 +724,8 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) {
     auto isSuitablePattern = [](NodePtr parent) {
         bool res = true && parent->getType() == Type::Transpose
                         && parent->getChildEdges().size() == 1
-                        && parent->getChildEdgeAt(0)->getOutputNum() == 1
+                        && one_of(parent->getChildEdgeAt(0)->getOutputNum(), 1, 3, 4)
                         && parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected
-                        && parent->getOutputShapeAtPort(0).getRank() == 2
                         && parent->isConstant();
         return res;
     };
diff --git a/src/plugins/intel_cpu/src/memory_desc/empty_memory_desc.h b/src/plugins/intel_cpu/src/memory_desc/empty_memory_desc.h
index 4b641669262591..1575841cb2be9e 100644
--- a/src/plugins/intel_cpu/src/memory_desc/empty_memory_desc.h
+++ b/src/plugins/intel_cpu/src/memory_desc/empty_memory_desc.h
@@ -59,7 +59,9 @@ class EmptyMemoryDesc : public MemoryDesc {
     }
 
     MemoryDescPtr cloneWithNewPrecision(const ov::element::Type prec) const override {
-        OPENVINO_THROW("Clone an empty memory desc with any precision (", prec, ") is prohibited");
+        OPENVINO_ASSERT(prec == ov::element::undefined,
+                        "Clone an empty memory desc with defined precision: ", prec, " is prohibited");
+        return clone();
     }
 
 private:
diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp
index 31c4a0d2a5b54d..991cec4ec8b3e8 100644
--- a/src/plugins/intel_cpu/src/node.cpp
+++ b/src/plugins/intel_cpu/src/node.cpp
@@ -5,6 +5,7 @@
 #include "node.h"
 #include "cpu_types.h"
 #include "edge.h"
+#include "openvino/core/type/element_type.hpp"
 #include "partitioned_mem_mgr.h"
 
 #include <memory>
@@ -1542,7 +1543,7 @@ bool Node::isInputTensorAtPortEmpty(size_t port) const {
     auto edge = getParentEdgeAt(port);
     if (one_of(edge->getStatus(), Edge::Status::Allocated, Edge::Status::Validated)) {
         auto&& mem = edge->getMemory();
-        if (mem.isDefined()) {
+        if (mem.isDefined() && !mem.getDesc().empty()) {
             return mem.getShape().hasZeroDims();
         }
     }
diff --git a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp
index 6f464abf33d036..f64a1da2af3377 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/acl/acl_fullyconnected.cpp
@@ -53,8 +53,9 @@ static MemoryPtr prepareWeightMemory(const MemoryArgs &memory,
                                      const PostOps &postOps) {
     DEBUG_LOG("ACLFullyConnectedExecutor: prepack weights");
     const auto& wgtDims = memory.at(ARG_WEI)->getStaticDims();
-    const auto N = wgtDims[0];
-    const auto K = wgtDims[1];
+    const auto N = std::accumulate(wgtDims.begin(), wgtDims.end() - 1, Dim{1}, std::multiplies<Dim>());
+    const auto K = wgtDims.back();
+    const VectorDims wgtDims2D = {N, K};
 
     auto create = [&]() {
         MemoryPtr final_ptr = memory.at(ARG_WEI);
@@ -91,9 +92,10 @@ static MemoryPtr prepareWeightMemory(const MemoryArgs &memory,
                 memoryArgs[ARG_WEI]   = final_ptr;
                 if (memory.at(ARG_SRC_0)->getShape().isDynamic()) {
                     const auto& inShape = memory.at(ARG_SRC_0)->getShape();
-                    const auto& wShape = final_ptr->getShape();
-                    const auto& inDymmyDims = makeDummyInputDims(inShape, wShape);
-                    const auto& outDymmyDims = makeDummyOutputDims(inDymmyDims, wShape.getStaticDims(), memory.at(ARG_DST)->getShape().getRank());
+                    const Shape wShape2D{wgtDims2D};
+                    // const auto& wShape = final_ptr->getShape();
+                    const auto& inDymmyDims = makeDummyInputDims(inShape, wShape2D);
+                    const auto& outDymmyDims = makeDummyOutputDims(inDymmyDims, wShape2D.getStaticDims(), memory.at(ARG_DST)->getShape().getRank());
                     memoryArgs[ARG_SRC_0] = std::make_shared<Memory>(context->getEngine(),
                                                                      memory.at(ARG_SRC_0)->getDescPtr()->cloneWithNewDims(inDymmyDims));
                     memoryArgs[ARG_DST] = std::make_shared<Memory>(context->getEngine(),
@@ -121,11 +123,11 @@ static MemoryPtr prepareWeightMemory(const MemoryArgs &memory,
         }
         // Transpose weights
         if (!aclfcAttrs.weightsNonTransposed) {
-            auto reverse_weights_dims = memory.at(ARG_WEI)->getStaticDims();
-            if (reverse_weights_dims.size() == 3) {
-                reverse_weights_dims = VectorDims(
-                        {reverse_weights_dims[0] * reverse_weights_dims[1], reverse_weights_dims[2]});
-            }
+            auto reverse_weights_dims = wgtDims2D;
+            // if (reverse_weights_dims.size() == 3) {
+            //     reverse_weights_dims = VectorDims(
+            //             {reverse_weights_dims[0] * reverse_weights_dims[1], reverse_weights_dims[2]});
+            // }
             std::reverse(reverse_weights_dims.begin(), reverse_weights_dims.end());
             MemoryArgs memoryArgs;
             memoryArgs[ARG_SRC_0] = final_ptr;
@@ -215,8 +217,8 @@ bool ACLFullyConnectedExecutor::supports(const FCConfig &config) {
 static void updateFCTensorsShapes(ACLShapes& aclMemoryShapes) {
     if (aclMemoryShapes[ACLArgs::ACL_WEI].num_dimensions() == 3U) {
         aclMemoryShapes[ACLArgs::ACL_WEI] = arm_compute::TensorShape(
-                {aclMemoryShapes[ACLArgs::ACL_WEI][0] * aclMemoryShapes[ACLArgs::ACL_WEI][1],
-                 aclMemoryShapes[ACLArgs::ACL_WEI][2]});
+                {aclMemoryShapes[ACLArgs::ACL_WEI][0],
+                 aclMemoryShapes[ACLArgs::ACL_WEI][1] * aclMemoryShapes[ACLArgs::ACL_WEI][2]});
     }
 
     if (one_of(aclMemoryShapes[ACLArgs::ACL_SRC_0].num_dimensions(), 3U, 4U)) {
diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp
index 8f9d7ad0805e41..86ef4cc6280f05 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp
@@ -158,7 +158,7 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const ConvAttrs& attrs,
     auto outputDataType = DnnlExtensionUtils::ElementTypeToDataType(dstDesc->getPrecision());
 
     DnnlPostOpsComposer
-        dnnlpoc(postOps, context->getEngine(), dims, 1, isINT8, 1 << 0, {}, attrs.withBias, outputDataType);
+        dnnlpoc(postOps, context->getEngine(), dims, 1, isINT8, 1 << 0, weiDesc->getShape().getRank() == 3, memory, outputDataType);
 
     return dnnlpoc.compose();
 }
diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
index fcb70d4753b2ce..160d2960775e25 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp
@@ -9,6 +9,7 @@
 #include <common/primitive_attr.hpp>
 #include <common/primitive_desc_iface.hpp>
 #include <common/primitive_iface.hpp>
+#include <cstddef>
 #include <memory>
 #include <oneapi/dnnl/dnnl.hpp>
 #include <oneapi/dnnl/dnnl_common.hpp>
@@ -27,6 +28,7 @@
 #include "nodes/executors/executor.hpp"
 #include "nodes/executors/fullyconnected_config.hpp"
 #include "nodes/executors/memory_arguments.hpp"
+#include "utils/cpu_utils.hpp"
 #include "utils/debug_capabilities.h"
 
 namespace ov {
@@ -108,6 +110,11 @@ std::shared_ptr<DnnlFCPrimitive> DnnlFCPrimitive::create(const MemoryArgs& memor
     return primitive;
 }
 
+template <typename T>
+static std::vector<T> normalizeDimsTo2D(const std::vector<T>& dims) {
+    return {std::accumulate(dims.begin(), dims.end() - 1, (T)1, std::multiplies<T>()), dims[dims.size() - 1]};
+}
+
 DnnlMemoryDescPtr DnnlFCPrimitive::makeTransposedWeightDescriptor(const DnnlMemoryDescPtr srcDesc,
                                                                   const DnnlMemoryDescPtr dstDesc,
                                                                   bool weightsNonTransposed) {
@@ -115,9 +122,11 @@ DnnlMemoryDescPtr DnnlFCPrimitive::makeTransposedWeightDescriptor(const DnnlMemo
         return srcDesc;
 
     const auto& weiDesc = srcDesc->getDnnlDesc();
-    const auto reorderedWeiDesc =
-        dnnl::memory::desc{weiDesc.get_dims(), weiDesc.get_data_type(), dnnl::memory::format_tag::ba};
-    const auto transposedWeiDesc = reorderedWeiDesc.reshape(dstDesc->getDnnlDesc().get_dims());
+    auto wDims = weiDesc.get_dims();
+    dnnl::memory::dim batchDim = std::accumulate(wDims.begin(), wDims.end() - 1, 1, std::multiplies<dnnl::memory::dim>());
+    dnnl::memory::dims dims2D{batchDim, wDims.back()};
+
+    const auto transposedWeiDesc = dnnl::memory::desc{dims2D, weiDesc.get_data_type(), dnnl::memory::format_tag::ba};
 
     return DnnlExtensionUtils::makeDescriptor(transposedWeiDesc);
 }
@@ -140,12 +149,11 @@ bool DnnlFCPrimitive::useWeightsDecompressionImpl(const ov::element::Type inputT
     return false;
 }
 
-bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize,
-                                                 const MemoryDescPtr srcDesc,
-                                                 const MemoryDescPtr weightsDesc,
-                                                 MemoryCPtr scalesPtr,
-                                                 MemoryCPtr zpPtr,
-                                                 bool needTranspose) {
+static bool useDynamicQuantizationImpl(size_t dqGroupSize,
+                                       const MemoryDescPtr srcDesc,
+                                       const MemoryDescPtr weightsDesc,
+                                       const MemoryArgs& memory,
+                                       bool needTranspose) {
     if (dqGroupSize == 0)
         return false;
 
@@ -155,6 +163,8 @@ bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize,
 
     if (srcDesc->getPrecision() != ov::element::f32)
         return false;
+
+    MemoryCPtr zpPtr = memory.count(ARG_WEI | ARG_ATTR_ZERO_POINTS) ? memory.at(ARG_WEI | ARG_ATTR_ZERO_POINTS) : nullptr;
     // For dynamic quantization, VNNI accumulation requires weight to be unsigned.
     // To support dynamic quantization with weights symmetrically quantized as i8/i4
     // w/o zero-point, we will transform weight to u8/u4 weight with zp 128/8.
@@ -177,11 +187,15 @@ bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize,
     if (weightsDesc->getPrecision() == ov::element::u4) {
         int ic = weightsDesc->getShape().getStaticDims()[1];
         int minGroupSize = INT_MAX;
+
+        MemoryCPtr scalesPtr = memory.count(ARG_WEI | ARG_ATTR_SCALES) ? memory.at(ARG_WEI | ARG_ATTR_SCALES) : nullptr;
+
         if (scalesPtr && scalesPtr->getShape().getRank() == 3) {
             auto scalesDims = scalesPtr->getShape().getStaticDims();
             auto groupsNum = needTranspose ? scalesDims[1] : scalesDims[0];
             minGroupSize = ic / groupsNum;
         }
+
         if (zpPtr && zpPtr->getShape().getRank() == 3) {
             auto zpDims = zpPtr->getShape().getStaticDims();
             int groupsNum = needTranspose ? zpDims[1] : zpDims[0];
@@ -196,11 +210,6 @@ bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize,
     return true;
 }
 
-template <typename T>
-static std::vector<T> normalizeDimsTo2D(const std::vector<T>& dims) {
-    return {std::accumulate(dims.begin(), dims.end() - 1, (T)1, std::multiplies<T>()), dims[dims.size() - 1]};
-}
-
 static DnnlPrimitiveAttrs createPrimitiveAttrs(const FCAttrs& attrs,
                                                const PostOps& postOps,
                                                const MemoryArgs& memory,
@@ -223,21 +232,23 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const FCAttrs& attrs,
                                 dims.size() - 1,
                                 isINT8,
                                 1 << 0,
-                                attrs.dequantizationScales,
-                                !memory.at(ARG_BIAS)->getDesc().empty(),
+                                weiDesc->getShape().getRank() == 3,
+                                memory,
                                 outputDataType);
 
-    if (attrs.decompressionMultiplyPtr) {
-        auto dstPrc = attrs.decompressionMultiplyPtr->getPrecision();
+    if (memory.count(ARG_WEI | ARG_ATTR_SCALES)) {
+        auto dstPrc = memory.at(ARG_WEI | ARG_ATTR_SCALES)->getPrecision();
         if (dstPrc != f8e8m0 || useDynamicQuantization)
             dstPrc = ov::element::f32;
 
-        dnnlpoc.appendDecompressionScales(attrs.decompressionMultiplyPtr, !attrs.weightsNonTransposed, dstPrc);
+        dnnlpoc.appendDecompressionScales(memory.at(ARG_WEI | ARG_ATTR_SCALES), !attrs.weightsNonTransposed, dstPrc);
     }
-    if (attrs.decompressionSubtractPtr) {
+
+    if (memory.count(ARG_WEI | ARG_ATTR_ZERO_POINTS)) {
         auto dstPrc = useDynamicQuantization ? ov::element::u8 : ov::element::f32;
-        dnnlpoc.appendDecompressionZeroPoints(attrs.decompressionSubtractPtr, !attrs.weightsNonTransposed, dstPrc);
+        dnnlpoc.appendDecompressionZeroPoints(memory.at(ARG_WEI | ARG_ATTR_ZERO_POINTS), !attrs.weightsNonTransposed, dstPrc);
     }
+
     if (useDynamicQuantization) {
         auto wei_precision = weiDesc->getPrecision();
         bool is_symmetric_weights = (wei_precision == ov::element::i8) || (wei_precision == ov::element::i4);
@@ -276,12 +287,13 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons
                                                                             const bool useWeightsDecompression) {
     const auto normalizedInputDesc = normalizeDescriptor(inputDesc);
     const auto normalizedOutputDesc = normalizeDescriptor(outputDesc);
+    const auto normalizedWeightDesc = normalizeDescriptor(weightDesc);
 
     const auto indt = normalizedInputDesc.get_data_type();
     auto wdt = indt;
 
     if (useWeightsDecompression) {
-        wdt = weightDesc.get_data_type();
+        wdt = normalizedWeightDesc.get_data_type();
 
         // dynamic quantization with symmetric quantized weights needs unsigned weights
         uint64_t dynQuantGroupSize = 0;
@@ -297,8 +309,8 @@ static dnnl::inner_product_forward::primitive_desc createDescriptorInternal(cons
     }
 
     const dnnl::memory::desc weightsDesc =
-        useSparseWeights ? dnnl::memory::desc().sparse_desc(weightDesc.get_dims(), wdt)
-                         : dnnl::memory::desc(weightDesc.get_dims(), wdt, memory::format_tag::any);
+        useSparseWeights ? dnnl::memory::desc().sparse_desc(normalizedWeightDesc.get_dims(), wdt)
+                         : dnnl::memory::desc(normalizedWeightDesc.get_dims(), wdt, memory::format_tag::any);
 
     return dnnl::inner_product_forward::primitive_desc(engine,
                                                        dnnl::prop_kind::forward_inference,
@@ -387,8 +399,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
         useWeightsDecompression && useDynamicQuantizationImpl(attrs.dynamicQuantizationGroupSize,
                                                               srcDesc,
                                                               weiDesc,
-                                                              attrs.decompressionMultiplyPtr,
-                                                              attrs.decompressionSubtractPtr,
+                                                              memory,
                                                               !attrs.weightsNonTransposed);
 
     const auto postOpData = createPrimitiveAttrs(attrs, postOps, memory, context, useDynamicQuantization);
diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp
index 5295b9655066cc..21247f149ca69f 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp
@@ -75,13 +75,6 @@ class DnnlFCPrimitive {
                                                    const DnnlShapeAgnosticDataPtr& shapeAgnosticData);
 
 private:
-    static bool useDynamicQuantizationImpl(size_t dqGroupSize,
-                                           const MemoryDescPtr srcDesc,
-                                           const MemoryDescPtr weightsDesc,
-                                           MemoryCPtr scalesPtr,
-                                           MemoryCPtr zpPtr,
-                                           bool needTranspose);
-
     dnnl::stream m_stream;
     dnnl::primitive_desc m_primDesc;
     impl_desc_type m_implType;
diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp
index 1b8646c858e532..2e4bf0556486af 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_matmul_primitive.cpp
@@ -104,10 +104,11 @@ DnnlMemoryDescPtr DnnlMatMulPrimitive::makeTransposedWeightDescriptor(const Dnnl
     const auto& weiDesc = srcDesc->getDnnlDesc();
     auto wDims = weiDesc.get_dims();
     auto wDataType = weiDesc.get_data_type();
-    std::swap(wDims[wDims.size() - 1], wDims[wDims.size() - 2]);
+    dnnl::memory::dim batchDim = std::accumulate(wDims.begin(), wDims.end() - 1, 1, std::multiplies<dnnl::memory::dim>());
+    dnnl::memory::dims dims2D{wDims.back(), batchDim};
 
     const auto format = weightsNonTransposed ? dnnl::memory::format_tag::ab : dnnl::memory::format_tag::ba;
-    const auto transposedWeiDesc = dnnl::memory::desc{wDims, wDataType, format};
+    const auto transposedWeiDesc = dnnl::memory::desc{dims2D, wDataType, format};
 
     return DnnlExtensionUtils::makeDescriptor(transposedWeiDesc);
 }
@@ -134,8 +135,8 @@ static DnnlPrimitiveAttrs createPrimitiveAttrs(const MatMulAttrs& attrs,
                                 dims.size() - 1,
                                 isINT8,
                                 1 << 0,
-                                attrs.dequantizationScales,
-                                !memory.at(ARG_BIAS)->getDesc().empty(),
+                                weiDesc->getShape().getRank() == 3,
+                                memory,
                                 outputDataType);
 
     return dnnlpoc.compose();
@@ -262,7 +263,7 @@ DnnlShapeAgnosticDataPtr DnnlMatMulPrimitive::createShapeAgnosticData(const FCAt
     const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr();
     const auto& biasDesc = memory.at(ARG_BIAS)->getDescPtr();
     auto dstDesc = memory.at(ARG_DST)->getDescPtr();
-    MatMulAttrs mmAttrs{false, false, attrs.dequantizationScales};
+    MatMulAttrs mmAttrs{false, false};
 
     const auto postOpData = createPrimitiveAttrs(mmAttrs, postOps, memory, context, false);
 
diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp
index ad6479597c6971..1bdbd9f369937b 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp
@@ -21,10 +21,11 @@ struct FCAttrs {
     bool sparseWeights = false;
     // @todo only memory descriptors should be a part of attributes
     // actual memory should be passed into "execute" or "prepareMemory" calls
-    std::vector<float> dequantizationScales;
+    // std::vector<float> dequantizationScales;
+
     // @todo should be passed as an additional memory input?
-    MemoryCPtr decompressionSubtractPtr;
-    MemoryCPtr decompressionMultiplyPtr;
+    // MemoryCPtr decompressionSubtractPtr;
+    // MemoryCPtr decompressionMultiplyPtr;
     uint64_t dynamicQuantizationGroupSize;
     ov::intel_cpu::Config::ModelType modelType = ov::intel_cpu::Config::ModelType::Unknown;
 };
diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp
index 5a8b1ef78b6dbb..2fde49d10e0a58 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp
@@ -439,8 +439,7 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
                         const ExecutorContext::CPtr context,
                         std::shared_ptr<DnnlShapeAgnosticData> shareAgnosticData) const {
                         MatMulAttrs matMulAttrs{false,
-                                                false,
-                                                attrs.dequantizationScales};
+                                                false};
                         auto primitive =
                             DefaultInstantiator<DnnlMatMulPrimitive, MatMulAttrs, DnnlShapeAgnosticData>{}(
                             memory,
diff --git a/src/plugins/intel_cpu/src/nodes/executors/matmul_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/matmul_config.hpp
index 9e484b24a2940e..e42bf3138bce91 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/matmul_config.hpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/matmul_config.hpp
@@ -12,7 +12,6 @@ namespace intel_cpu {
 struct MatMulAttrs {
     bool transposeA;
     bool transposeB;
-    std::vector<float> dequantizationScales;
 };
 
 using MatMulConfig = executor::Config<MatMulAttrs>;
diff --git a/src/plugins/intel_cpu/src/nodes/executors/memory_arguments.hpp b/src/plugins/intel_cpu/src/nodes/executors/memory_arguments.hpp
index c04ca39e845ee1..9959188b9a8cf4 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/memory_arguments.hpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/memory_arguments.hpp
@@ -24,6 +24,10 @@ using MemoryArgs     = std::unordered_map<int, MemoryPtr>;
 #define ARG_WEI_0 33
 #define ARG_WEI   ARG_WEI_0
 #define ARG_BIAS  41
+/// Scaling factors provided at execution time.
+#define ARG_ATTR_SCALES 4096
+/// Zero points provided at execution time.
+#define ARG_ATTR_ZERO_POINTS 8192
 
 }  // namespace intel_cpu
 }  // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp
index a03bfe2649413a..8fd945b773f262 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp
@@ -23,6 +23,10 @@ using namespace executor;
 using namespace dnnl;
 using namespace ov::element;
 
+static Dim batchDim(const VectorDims& dims) {
+    return std::accumulate(dims.begin(), dims.end() - 1, 1, std::multiplies<Dim>());
+}
+
 static MemoryPtr prepareWeightMemory(const MemoryPtr weightsMemory,
                                      const ExecutorContext::CPtr context,
                                      const bool weightsTransposed) {
@@ -31,14 +35,15 @@ static MemoryPtr prepareWeightMemory(const MemoryPtr weightsMemory,
     // Weights are transposed by MatMulConstTransposesExtraction
     // K is the IC of weight
     // the weight is reshaped to [-1, K] in ConvertMatMulToFC
-    const auto K = wgtDims[1];
-    const auto N = wgtDims[0];
+    Dim K = wgtDims.back();
+    Dim N = batchDim(wgtDims);
 
     auto packedBsize = mlas_sgemm_pack_get_size(N, K);
 
     auto create = [&]() {
         float* weightPtr = weightsMemory->getDataAs<float>();
         size_t ldb = weightsTransposed ? K : N;
+
         MemoryPtr _ptr = std::make_shared<Memory>(context->getEngine(),
                                                   intel_cpu::CpuBlockedMemoryDesc(i8, intel_cpu::Shape{packedBsize}));
         float* prepackedDst = _ptr->getDataAs<float>();
@@ -66,21 +71,10 @@ bool MlasGemmExecutor::supports(const FCConfig& config) {
         DEBUG_LOG("MlasGemmExecutor: PostOps are not supported");
         return false;
     }
-    const auto& weiDesc = config.descs.at(ARG_WEI);
-    const auto& dstDesc = config.descs.at(ARG_DST);
 
-    // MLAS cannot support weight dims > 2, e.g. [1,64,9,9] * [10,64,9,9]
-    const auto& weightsDims = weiDesc->getShape().getStaticDims();
-    if (weightsDims.size() > 2) {
-        if (!std::all_of(weightsDims.begin() + 2, weightsDims.end(), [](const Dim dim) {
-                return dim == 1;
-            })) {
-            DEBUG_LOG("MlasGemmExecutor: weights dims > 2 are not supported");
-            return false;
-        }
-    }
+    const auto& dstDesc = config.descs.at(ARG_DST);
 
-    if (config.attrs.withBias) {
+    if (!config.descs.at(ARG_BIAS)->empty()) {
         const auto& biaDesc = config.descs.at(ARG_BIAS);
         const auto& biasDims = biaDesc->getShape().getStaticDims();
         const auto& outDims = dstDesc->getShape().getDims();
@@ -108,24 +102,17 @@ MlasGemmExecutor::MlasGemmExecutor(const FCAttrs& attrs,
                                    const ExecutorContext::CPtr context)
     : m_attrs(attrs),
       m_memoryArgs(memory),
-      packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context, !attrs.weightsNonTransposed)) {}
+      packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context, !attrs.weightsNonTransposed)),
+      N(batchDim(memory.at(ARG_WEI)->getStaticDims())),
+      K(memory.at(ARG_WEI)->getStaticDims().back())
+{}
 
 bool MlasGemmExecutor::update(const MemoryArgs& memory) {
-    const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr();
     const auto& dstDesc = memory.at(ARG_DST)->getDescPtr();
-    const auto& wgtDims = weiDesc->getShape().getStaticDims();
-    // Weights are transposed by MatMulConstTransposesExtraction
-    // K is the IC of weight
-    // the weight is reshaped to [-1, K] in ConvertMatMulToFC
-    K = wgtDims[1];
-    N = wgtDims[0];
 
     const auto& outDims = dstDesc->getShape().getStaticDims();
-    if (outDims.size() > 2) {
-        M = std::accumulate(outDims.begin(), outDims.end() - 1, 1, std::multiplies<size_t>());
-    } else {
-        M = outDims[0];
-    }
+    M = outDims.size() > 2 ? batchDim(outDims) : outDims[0];
+
     return true;
 }
 
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
index 5d2b6fd9b50212..3cd09658fbae2e 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
@@ -21,6 +21,9 @@
 #include "nodes/executors/fullyconnected_config.hpp"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/runtime/threading/cpu_message.hpp"
+#include "ov_ops/fully_connected.hpp"
+#include "ov_ops/fully_connected_quantized.hpp"
+#include "ov_ops/placeholder.hpp"
 #include "post_ops.hpp"
 #include "shape_inference/custom/fullyconnected.hpp"
 #include "transformations/cpu_opset/common/op/fully_connected.hpp"
@@ -39,21 +42,22 @@ namespace node {
 bool FullyConnected::isSupportedOperation(const std::shared_ptr<const ov::Node>& op,
                                           std::string& errorMessage) noexcept {
     try {
-        const auto fc = std::dynamic_pointer_cast<const FullyConnectedNode>(op);
+        const auto fcQuantized = std::dynamic_pointer_cast<const ov::op::internal::FullyConnectedQuantized>(op);
+        if (fcQuantized) {
+            return true;
+        }
+
+        const auto fc = std::dynamic_pointer_cast<const ov::op::internal::FullyConnected>(op);
         if (!fc) {
             errorMessage = "Only legacy FullyConnected operation is supported";
             return false;
         }
         if (fc->get_input_size() == 3 &&
-            std::dynamic_pointer_cast<const ov::op::v0::Constant>(fc->get_input_node_shared_ptr(BIAS_ID)) == nullptr) {
+            (std::dynamic_pointer_cast<const ov::op::v0::Constant>(fc->get_input_node_shared_ptr(BIAS)) == nullptr &&
+             std::dynamic_pointer_cast<const ov::op::internal::Placeholder>(fc->get_input_node_shared_ptr(BIAS)) == nullptr)) {
             errorMessage = "Only Constant operation on 'bias' input is supported";
             return false;
         }
-        const auto weightRank = fc->get_input_partial_shape(WEIGHTS_ID).size();
-        if (weightRank != 2) {
-            errorMessage = "Doesn't support 'weight' input with rank: " + std::to_string(weightRank);
-            return false;
-        }
     } catch (...) {
         return false;
     }
@@ -79,6 +83,27 @@ FullyConnected::FullyConnected(const std::shared_ptr<ov::Node>& op, const GraphC
     initTensorParallelConfig(context);
     if (!isSupportedOperation(op, errorMessage))
         OPENVINO_THROW_NOT_IMPLEMENTED(errorMessage);
+
+    argToInput[ARG_SRC] = DATA;
+    argToInput[ARG_WEI] = WEIGHTS;
+    argToInput[ARG_BIAS] = BIAS;
+    const auto fcQuantized = std::dynamic_pointer_cast<const ov::op::internal::FullyConnectedQuantized>(op);
+    if (fcQuantized) {
+        if (fcQuantized->get_input_size() > OUTPUT_SCALES &&
+            fcQuantized->input(OUTPUT_SCALES).get_element_type() != ov::element::undefined) {
+            argToInput[ARG_DST | ARG_ATTR_SCALES] = OUTPUT_SCALES;
+        }
+
+        if (fcQuantized->get_input_size() > WEIGHT_SCALES &&
+            fcQuantized->input(WEIGHT_SCALES).get_element_type() != ov::element::undefined) {
+            argToInput[ARG_WEI | ARG_ATTR_SCALES] = WEIGHT_SCALES;
+        }
+
+        if (fcQuantized->get_input_size() > WEIGHT_ZERO_POINTS &&
+            fcQuantized->input(WEIGHT_ZERO_POINTS).get_element_type() != ov::element::undefined) {
+            argToInput[ARG_WEI | ARG_ATTR_ZERO_POINTS] = WEIGHT_ZERO_POINTS;
+        }
+    }
 }
 
 bool FullyConnected::canBeExecutedInInt8() const {
@@ -364,31 +389,13 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput,
     return sparseRate >= minSparseRate;
 }
 
-void FullyConnected::needUpdateDQScaleForTensorParallel(std::vector<float>& dequantizationScales) {
-    if (tp_cfg.enable_tensor_parallel) {
-        auto split_parts = [](int len, int n) {
-            int average = len / n;
-            std::vector<int> parts(n, average);
-            parts.back() = len - average * (n - 1);
-            return parts;
-        };
-        auto DQScales = getDQScales();
-        auto split_lens = split_parts(DQScales.size(), tp_cfg.w_size);
-        auto split_offset = tp_cfg.w_rank * split_lens[0];
-        std::vector<float> newDQScales(split_lens[tp_cfg.w_rank]);
-        std::copy(DQScales.begin() + split_offset, DQScales.begin() + split_offset + split_lens[tp_cfg.w_rank], newDQScales.begin());
-        dequantizationScales = newDQScales;
-    }
-}
-
 void FullyConnected::initSupportedPrimitiveDescriptors() {
-    attrs.withBias = getOriginalInputsNumber() == 3;
+    attrs.withBias = getOriginalInputPrecisionAtPort(BIAS) != ov::element::undefined;
+    // attrs.dequantizationScales = getDQScales();
+    // needUpdateDQScaleForTensorParallel(attrs.dequantizationScales);
 
-    attrs.dequantizationScales = getDQScales();
-    needUpdateDQScaleForTensorParallel(attrs.dequantizationScales);
-
-    attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS_ID)->getParent(),
-                                                        getOriginalInputPrecisionAtPort(DATA_ID),
+    attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(),
+                                                        getOriginalInputPrecisionAtPort(DATA),
                                                         context->getConfig().fcSparseWeiDecompressionRate);
     attrs.dynamicQuantizationGroupSize = context->getConfig().fcDynamicQuantizationGroupSize;
     attrs.modelType = context->getConfig().modelType;
@@ -404,6 +411,10 @@ void FullyConnected::initSupportedPrimitiveDescriptors() {
     VecMemoryDescs srcDescs;
     const auto& creatorsMap = BlockedDescCreator::getCommonCreators();
     for (size_t i = 0; i < srcTypes.size(); i++) {
+        if (srcTypes[i] == element::undefined) {
+            srcDescs.push_back(MemoryDescUtils::makeEmptyDesc());
+            continue;
+        }
         const auto srcDesc = creatorsMap.at(LayoutType::ncsp)->createSharedDesc(srcTypes[i], getInputShapeAtPort(i));
         srcDescs.push_back(srcDesc);
     }
@@ -415,23 +426,32 @@ void FullyConnected::initSupportedPrimitiveDescriptors() {
     }
 
     MemoryDescArgs descs{
-        {ARG_SRC, srcDescs[0]},
-        {ARG_WEI, srcDescs[1]},
-        {ARG_BIAS, attrs.withBias ? srcDescs[2] : MemoryDescUtils::makeEmptyDesc()},
+        {ARG_SRC, srcDescs[DATA]},
+        {ARG_WEI, srcDescs[WEIGHTS]},
+        {ARG_BIAS, srcDescs[BIAS]},
         {ARG_DST, dstDescs[0]},
     };
 
-    needUpdateScaleForTensorParallel();
-    needUpdateZeroPointForTensorParallel();
-
     auto executionContext = std::make_shared<ExecutorContext>(context, getImplPriority(), privateWeightCache);
     factory = std::make_shared<ExecutorFactory<FCAttrs, node::FullyConnected>>(attrs, postOps, executionContext, descs);
     const auto nodeDescriptors = factory->getProperMemoryDescriptors(descs);
 
     NodeConfig nodeConfig;
-    nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_SRC));
-    nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_WEI));
-    if (attrs.withBias) nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_BIAS));
+    nodeConfig.inConfs.resize(srcDescs.size());
+
+    for (const auto& desc : nodeDescriptors) {
+        if (argToInput.count(desc.first)) {
+            nodeConfig.inConfs[argToInput[desc.first]] = desc.second;
+        }
+    }
+
+    for (size_t i = 3; i < srcDescs.size(); i++) {
+        nodeConfig.inConfs[i] = srcDescs[i];
+    }
+
+    // nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_SRC));
+    // nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_WEI));
+    // nodeConfig.inConfs.emplace_back(nodeDescriptors.at(ARG_BIAS));
 
     const int inPlace = canBeInPlace() ? 0 : -1;
     nodeConfig.outConfs.emplace_back(nodeDescriptors.at(ARG_DST), BlockedMemoryDesc::FULL_MASK, inPlace);
@@ -441,11 +461,11 @@ void FullyConnected::initSupportedPrimitiveDescriptors() {
 
 void FullyConnected::needSplitMemoryForTensorParallel() {
     if (tp_cfg.enable_tensor_parallel) {
-        auto src = getSrcMemoryAtPort(DATA_ID);
-        auto wgt = getSrcMemoryAtPort(WEIGHTS_ID);
+        auto src = getSrcMemoryAtPort(DATA);
+        auto wgt = getSrcMemoryAtPort(WEIGHTS);
         auto dst = getDstMemoryAtPort(0);
         // src
-        memory[ARG_SRC] = getSrcMemoryAtPort(DATA_ID);
+        memory[ARG_SRC] = getSrcMemoryAtPort(DATA);
         // wgt
         // split N direction
         tp_cfg.cached_splited_weight = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), wgt, 0, tp_cfg.w_rank, tp_cfg.w_size)
@@ -453,7 +473,7 @@ void FullyConnected::needSplitMemoryForTensorParallel() {
         memory[ARG_WEI] = tp_cfg.cached_splited_weight;
         // bias
         if (attrs.withBias) {
-            auto bias = getSrcMemoryAtPort(BIAS_ID);
+            auto bias = getSrcMemoryAtPort(BIAS);
             auto select_bias = split_horizontal(context->getEngine(), bias, 0, tp_cfg.w_rank, tp_cfg.w_size);
             tp_cfg.cached_splited_bias = select_bias;
         } else {
@@ -463,6 +483,33 @@ void FullyConnected::needSplitMemoryForTensorParallel() {
         // dst
         memory[ARG_DST] = getDstMemoryAtPort(0);
         tp_cfg.cached_dst = split_horizontal(context->getEngine(), dst, -1, tp_cfg.w_rank, tp_cfg.w_size, false);
+
+        memory[ARG_DST | ARG_ATTR_SCALES] = split_horizontal(context->getEngine(), memory[ARG_DST | ARG_ATTR_SCALES], 0, tp_cfg.w_rank, tp_cfg.w_size);
+        // auto split_parts = [](int len, int n) {
+        //     int average = len / n;
+        //     std::vector<int> parts(n, average);
+        //     parts.back() = len - average * (n - 1);
+        //     return parts;
+        // };
+        // auto DQScales = getDQScales();
+        // auto split_lens = split_parts(DQScales.size(), tp_cfg.w_size);
+        // auto split_offset = tp_cfg.w_rank * split_lens[0];
+        // std::vector<float> newDQScales(split_lens[tp_cfg.w_rank]);
+        // std::copy(DQScales.begin() + split_offset, DQScales.begin() + split_offset + split_lens[tp_cfg.w_rank], newDQScales.begin());
+        // dequantizationScales = newDQScales;
+
+        auto scale_mem = std::const_pointer_cast<IMemory>(memory[ARG_WEI | ARG_ATTR_SCALES]);
+        memory[ARG_WEI | ARG_ATTR_SCALES] = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), scale_mem, 0, tp_cfg.w_rank, tp_cfg.w_size)
+            : split_horizontal(context->getEngine(), scale_mem, 0, tp_cfg.w_rank, tp_cfg.w_size);
+
+        auto zeropoint_mem = std::const_pointer_cast<IMemory>(memory[ARG_WEI | ARG_ATTR_ZERO_POINTS]);
+        auto element_num = zeropoint_mem->getSize() / zeropoint_mem->getPrecision().size();
+        if (element_num == 1) {
+            tp_cfg.cached_zeropoint = zeropoint_mem;
+        } else {
+            tp_cfg.cached_zeropoint = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size)
+                                : split_horizontal(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size);
+        }
     }
 }
 
@@ -471,7 +518,7 @@ void FullyConnected::needUpdateTensorParalelConfig() {
     // 1. weight shape is dynamic
     // 2. last dim can be splited.
     if (tp_cfg.enable_tensor_parallel) {
-        auto shape = getSrcMemoryAtPort(WEIGHTS_ID)->getShape();
+        auto shape = getSrcMemoryAtPort(WEIGHTS)->getShape();
         if (shape.isDynamic()) {
             tp_cfg.enable_tensor_parallel = false;
         } else if (shape.getDims()[0] < static_cast<size_t>(tp_cfg.w_size)) {
@@ -479,15 +526,31 @@ void FullyConnected::needUpdateTensorParalelConfig() {
         }
     }
 }
+
+
+
 void FullyConnected::createPrimitive() {
     needUpdateTensorParalelConfig();
 
-    memory[ARG_SRC] = getSrcMemoryAtPort(DATA_ID);
-    memory[ARG_WEI] = getSrcMemoryAtPort(WEIGHTS_ID);
-    memory[ARG_BIAS] = attrs.withBias ? getSrcMemoryAtPort(BIAS_ID) : MemoryDescUtils::makeEmptyMemory(context);
+    memory[ARG_SRC] = getSrcMemoryAtPort(DATA);
+    memory[ARG_WEI] = getSrcMemoryAtPort(WEIGHTS);
+    memory[ARG_BIAS] = getSrcMemoryAtPort(BIAS);
     memory[ARG_DST] = getDstMemoryAtPort(0);
 
+    if (argToInput.count(ARG_DST | ARG_ATTR_SCALES)) {
+        memory[ARG_DST | ARG_ATTR_SCALES] = getSrcMemoryAtPort(argToInput[ARG_DST | ARG_ATTR_SCALES]);
+    }
+
+    if (argToInput.count(ARG_WEI | ARG_ATTR_SCALES)) {
+        memory[ARG_WEI | ARG_ATTR_SCALES] = getSrcMemoryAtPort(argToInput[ARG_WEI | ARG_ATTR_SCALES]);
+    }
+
+    if (argToInput.count(ARG_WEI | ARG_ATTR_ZERO_POINTS)) {
+        memory[ARG_WEI | ARG_ATTR_ZERO_POINTS] = getSrcMemoryAtPort(argToInput[ARG_WEI | ARG_ATTR_ZERO_POINTS]);
+    }
+
     needSplitMemoryForTensorParallel();
+
     // @todo should we preconfigure only for dynamic shapes?
     // Since for static shapes primitive is created in scope of compile_model() anyway
     factory->preconfigure(memory);
@@ -511,49 +574,6 @@ ov::element::Type FullyConnected::getRuntimePrecision() const {
     return getMaxPrecision(srcTypes);
 }
 
-void FullyConnected::needUpdateScaleForTensorParallel() {
-    if (tp_cfg.enable_tensor_parallel && tp_cfg.cached_scale) {
-        attrs.decompressionMultiplyPtr = tp_cfg.cached_scale;
-    }
-}
-
-void FullyConnected::needSplitScaleForTensorParallel(const MemoryCPtr& memory) {
-    if (tp_cfg.enable_tensor_parallel && !tp_cfg.cached_scale) {
-        auto scale_mem = std::const_pointer_cast<IMemory>(memory);
-        tp_cfg.cached_scale = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), scale_mem, 0, tp_cfg.w_rank, tp_cfg.w_size)
-                       : split_horizontal(context->getEngine(), scale_mem, 0, tp_cfg.w_rank, tp_cfg.w_size);
-    }
-}
-
-void FullyConnected::needUpdateZeroPointForTensorParallel() {
-    if (tp_cfg.enable_tensor_parallel && tp_cfg.cached_zeropoint) {
-        attrs.decompressionSubtractPtr = tp_cfg.cached_zeropoint;
-    }
-}
-
-void FullyConnected::needSplitZeroPointForTensorParallel(const MemoryCPtr& memory) {
-    if (tp_cfg.enable_tensor_parallel && !tp_cfg.cached_zeropoint) {
-        auto zeropoint_mem = std::const_pointer_cast<IMemory>(memory);
-        auto element_num = memory->getSize() / memory->getPrecision().size();
-        if (element_num == 1) {
-            tp_cfg.cached_zeropoint = zeropoint_mem;
-        } else {
-            tp_cfg.cached_zeropoint = attrs.weightsNonTransposed ? split_vertical(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size)
-                                : split_horizontal(context->getEngine(), zeropoint_mem, 0, tp_cfg.w_rank, tp_cfg.w_size);
-        }
-    }
-}
-
-void FullyConnected::fuseDecompressionMultiply(const MemoryCPtr& memory) {
-    attrs.decompressionMultiplyPtr = memory;
-    needSplitScaleForTensorParallel(memory);
-}
-
-void FullyConnected::fuseDecompressionSubtract(const MemoryCPtr& memory) {
-    attrs.decompressionSubtractPtr = memory;
-    needSplitZeroPointForTensorParallel(memory);
-}
-
 }  // namespace node
 }  // namespace intel_cpu
 }  // namespace ov
diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h
index be29342b851988..414e15ed488c6a 100644
--- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h
+++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h
@@ -6,9 +6,11 @@
 
 #include <node.h>
 
+#include <cstddef>
 #include <memory>
 #include <oneapi/dnnl/dnnl.hpp>
 #include <string>
+#include <unordered_set>
 #include <vector>
 
 #include "cpu_memory.h"
@@ -81,9 +83,19 @@ class FullyConnected : public Node {
     void toNumaNodeImpl(int numaID) override;
 
 private:
-    static const size_t DATA_ID = 0;
-    static const size_t WEIGHTS_ID = 1;
-    static const size_t BIAS_ID = 2;
+    enum InputId : size_t {
+        DATA                          = 0,
+        WEIGHTS                       = 1,
+        BIAS                          = 2,
+        WEIGHT_SCALES                 = 3,
+        WEIGHT_ZERO_POINTS            = 4,
+        INPUT_SCALES                  = 5,
+        INPUT_ZERO_POINTS             = 6,
+        OUTPUT_SCALES                 = 7,
+        OUTPUT_ZERO_POINTS            = 8,
+    };
+
+    std::unordered_map<size_t, size_t> argToInput;
 
     ExecutorPtr createExecutor();
     void fuseDecompressionConstant(const MemoryCPtr& memory, MemoryCPtr& decompressionValuesPtr);
@@ -94,11 +106,6 @@ class FullyConnected : public Node {
     void initTensorParallelSync();
     void execTensorParallelSync();
     void needSplitMemoryForTensorParallel();
-    void needSplitScaleForTensorParallel(const MemoryCPtr& memory);
-    void needUpdateScaleForTensorParallel();
-    void needSplitZeroPointForTensorParallel(const MemoryCPtr& memory);
-    void needUpdateZeroPointForTensorParallel();
-    void needUpdateDQScaleForTensorParallel(std::vector<float>& dequantizationScales);
 
     FCAttrs attrs;
     PostOps postOps;
diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp
index ea659ec1e31b84..78a65363e5a4da 100644
--- a/src/plugins/intel_cpu/src/nodes/input.cpp
+++ b/src/plugins/intel_cpu/src/nodes/input.cpp
@@ -7,6 +7,8 @@
 #include "cpu/x64/jit_generator.hpp"
 #include "openvino/core/parallel.hpp"
 #include "shape_inference/shape_inference_pass_through.hpp"
+#include "ov_ops/placeholder.hpp"
+#include "memory_desc/cpu_memory_desc_utils.h"
 
 using namespace dnnl;
 using namespace dnnl::impl::cpu::x64;
@@ -222,14 +224,18 @@ Input::Input(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr conte
                 op::v0::Constant::get_type_info_static(),
                 op::v0::Result::get_type_info_static(),
                 op::v3::ReadValue::get_type_info_static(),
-                op::v6::ReadValue::get_type_info_static()))
+                op::v6::ReadValue::get_type_info_static(),
+                op::internal::Placeholder::get_type_info_static()))
         OPENVINO_THROW_NOT_IMPLEMENTED("CPU Input node doesn't support ngraph operation ",
                                        op->get_type_name(),
                                        " with name ",
                                        op->get_friendly_name());
-    constOp = ov::as_type_ptr<op::v0::Constant>(op);
-    if (constOp) {
+    if (auto placeHolder = ov::as_type_ptr<op::internal::Placeholder>(op)) {
+        memoryPtr = MemoryDescUtils::makeEmptyMemory(context);
         constant = ConstantType::Const;
+    } else if (auto constOp = ov::as_type_ptr<op::v0::Constant>(op)) {
+        constant = ConstantType::Const;
+        m_constOp = constOp;
         cloneBlobIfRequired();
     } else {
         constant = ConstantType::StrictNoConst;
@@ -237,8 +243,8 @@ Input::Input(const std::shared_ptr<ov::Node>& op, const GraphContext::CPtr conte
 }
 
 void Input::cloneBlobIfRequired() {
-    Shape shape(constOp->get_shape().empty() ? ov::Shape(1, 1) : constOp->get_shape());
-    const auto prec = constOp->get_element_type();
+    Shape shape(m_constOp->get_shape().empty() ? ov::Shape(1, 1) : m_constOp->get_shape());
+    const auto prec = m_constOp->get_element_type();
     const size_t size = shape.getElementsCount();
     CpuBlockedMemoryDesc memDesc(prec, shape);
 
@@ -257,21 +263,21 @@ void Input::cloneBlobIfRequired() {
         // oneDNN always allocate 1byte for element type with bitWidth < 8 (u4,u1...)
         // but ngraph Constant uses actual bitWidth for data storage allocation
         // in that case we make a copy to avoid overflow
-        if (constOp->get_byte_size() >= memDesc.getCurrentMemSize()) {
-            if (constOp->get_element_type() == element::string) {
-                memory = std::make_shared<StringMemory>(getEngine(), memDesc, constOp->get_data_ptr<element::string>());
+        if (m_constOp->get_byte_size() >= memDesc.getCurrentMemSize()) {
+            if (m_constOp->get_element_type() == element::string) {
+                memory = std::make_shared<StringMemory>(getEngine(), memDesc, m_constOp->get_data_ptr<element::string>());
             } else {
-                memory = std::make_shared<Memory>(getEngine(), memDesc, constOp->get_data_ptr());
+                memory = std::make_shared<Memory>(getEngine(), memDesc, m_constOp->get_data_ptr());
             }
         } else {
-            if (constOp->get_element_type() == element::string) {
+            if (m_constOp->get_element_type() == element::string) {
                 memory = std::make_shared<StringMemory>(getEngine(), memDesc);
-                auto src = constOp->get_data_ptr<StringMemory::OvString>();
+                auto src = m_constOp->get_data_ptr<StringMemory::OvString>();
                 auto dst = memory->getDataAs<StringMemory::OvString>();
                 std::copy(src, src + size, dst);
             } else {
                 memory = std::make_shared<Memory>(getEngine(), memDesc);
-                memcpy(memory->getData(), constOp->get_data_ptr(), constOp->get_byte_size());
+                memcpy(memory->getData(), m_constOp->get_data_ptr(), m_constOp->get_byte_size());
             }
         }
 
@@ -287,7 +293,7 @@ void Input::cloneBlobIfRequired() {
     };
 
     auto isBlobAligned = [&, this] () {
-        const void *ptr = constOp->get_data_ptr();
+        const void *ptr = m_constOp->get_data_ptr();
         bool blobAlignedOnSSE = true;
 #if defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64)
         // Majority of arithmetic and data processing instructions in legacy SSE isa requires
@@ -302,7 +308,7 @@ void Input::cloneBlobIfRequired() {
     // The presence of subnormals is better to determined at IR read time.
     auto hasSubnormals = [&, this] () {
         if (prec == ov::element::f32) {
-            uint32_t const *u32data = constOp->get_data_ptr<uint32_t>();
+            uint32_t const *u32data = m_constOp->get_data_ptr<uint32_t>();
 
             if (!size)
                 return false;
@@ -345,7 +351,7 @@ void Input::cloneBlobIfRequired() {
 
     auto blobKey = [&, this] () {
         char ptr[32];
-        snprintf(ptr, sizeof ptr, "%p", constOp->get_data_ptr());
+        snprintf(ptr, sizeof ptr, "%p", m_constOp->get_data_ptr());
         return getName()
                 + "_" + std::to_string(size * prec.size())
                 + "_" + ptr;
@@ -362,7 +368,7 @@ void Input::cloneBlobIfRequired() {
         // TODO: don't clone blob for multisocket + multistream case if current stream is run on the numa node where original weights are stored.
         (!weightCache || context->getNumNumaNodes() == 1 || context->getCPUStreamExecutor()->get_streams_num() == 1);
 
-    memoryPtr = clone_is_not_needed ? std::make_shared<Memory>(getEngine(), memDesc, constOp->get_data_ptr())
+    memoryPtr = clone_is_not_needed ? std::make_shared<Memory>(getEngine(), memDesc, m_constOp->get_data_ptr())
                                     : std::const_pointer_cast<const IMemory>(
                                           weightCache ? *weightCache->findOrCreate(blobKey(), cloneBlob) : cloneBlob());
 }
diff --git a/src/plugins/intel_cpu/src/nodes/input.h b/src/plugins/intel_cpu/src/nodes/input.h
index 9b304e5a75a891..de9324f0b13628 100644
--- a/src/plugins/intel_cpu/src/nodes/input.h
+++ b/src/plugins/intel_cpu/src/nodes/input.h
@@ -44,7 +44,7 @@ class Input : public Node {
     void initSupportedPdFromMemDesc();
 
 private:
-    std::shared_ptr<ov::op::v0::Constant> constOp;
+    std::shared_ptr<ov::op::v0::Constant> m_constOp;
     MemoryCPtr memoryPtr;
     MemoryDescPtr extMemDesc = nullptr;
     bool isMeanImage = false;
diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp
index 43b8f041184a70..d972914e9434b6 100644
--- a/src/plugins/intel_cpu/src/nodes/reference.cpp
+++ b/src/plugins/intel_cpu/src/nodes/reference.cpp
@@ -14,7 +14,7 @@ Reference::Reference(const std::shared_ptr<ov::Node>& op, const GraphContext::CP
         Node(op, context, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ovCoreNode(op), additionalErrorMessage(errorMessage) {
     if (!op->has_evaluate()) {
         OPENVINO_THROW_NOT_IMPLEMENTED(
-            "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented");
+            "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented for op: ", *op);
     }
 
     setType(Type::Reference);
diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp
index 5aef73df1949bd..e23f9c3776abf6 100644
--- a/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp
+++ b/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp
@@ -15,7 +15,8 @@ Result FCShapeInfer::infer(
     const VectorDims& activationShape = input_shapes[0].get();
     const VectorDims& weightShape = input_shapes[1].get();
     size_t activationRank = activationShape.size();
-    size_t channelRank = weightShape.size() - 1;
+    // size_t channelRank = weightShape.size() - 1;
+    size_t channelRank = 1;
 
     // activation   weight    output_shape
     // NCHW         CoCHW     NCo
@@ -23,7 +24,7 @@ Result FCShapeInfer::infer(
     // NC           CoC       NCo
     VectorDims outputShape(out_rank, 1);
     // set Co
-    outputShape.back() = weightShape[0];
+    outputShape.back() = std::accumulate(weightShape.begin(), weightShape.end() - 1, 1, std::multiplies<Dim>());
     // set batch dims
     size_t batchRank = activationRank - channelRank;
     size_t startIdx = out_rank - batchRank - 1;
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp
index f2861843a81110..77cb336227db28 100644
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp
@@ -2,7 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "transformations/cpu_opset/common/op/fully_connected.hpp"
+// #include "transformations/cpu_opset/common/op/fully_connected.hpp"
+#include "ov_ops/fully_connected.hpp"
 #include "convert_matmul_to_fc.hpp"
 #include "openvino/op/matmul.hpp"
 #include "openvino/op/convert.hpp"
@@ -10,6 +11,7 @@
 #include "openvino/op/reshape.hpp"
 #include "openvino/core/rt_info.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "ov_ops/placeholder.hpp"
 #include "transformations/utils/utils.hpp"
 
 #include "itt.hpp"
@@ -138,18 +140,18 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() {
         // Transferring from MatMul representation: [B, I, K] * [B, K, O] = [B, I, O]
         // to FullyConnected representation: [I, K] * [K, O] = [I, O]
 
-        if (rank_b != 2) {
-            ov::Dimension K = *(shape_b_aligned.rbegin() + 1);
-            OPENVINO_ASSERT(K.is_static());
-            auto k_len = K.get_length();
-            auto reshape_shape_values = matmul->get_transpose_b() ? std::vector<int64_t>{-1, k_len} : std::vector<int64_t>{k_len, -1};
-            auto reshape_shape = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, reshape_shape_values);
-            fc_input_b = ov::op::util::make_try_fold<ov::op::v1::Reshape>(fc_input_b, reshape_shape, false);
-            if (!std::dynamic_pointer_cast<ov::op::v0::Constant>(fc_input_b.get_node_shared_ptr())) {
-                new_ops.push_back(reshape_shape);
-            }
-            new_ops.push_back(fc_input_b.get_node_shared_ptr());
-        }
+        // if (rank_b != 2) {
+        //     ov::Dimension K = *(shape_b_aligned.rbegin() + 1);
+        //     OPENVINO_ASSERT(K.is_static());
+        //     auto k_len = K.get_length();
+        //     auto reshape_shape_values = matmul->get_transpose_b() ? std::vector<int64_t>{-1, k_len} : std::vector<int64_t>{k_len, -1};
+        //     auto reshape_shape = ov::op::v0::Constant::create(ov::element::i32, ov::Shape{ 2 }, reshape_shape_values);
+        //     fc_input_b = ov::op::util::make_try_fold<ov::op::v1::Reshape>(fc_input_b, reshape_shape, false);
+        //     if (!std::dynamic_pointer_cast<ov::op::v0::Constant>(fc_input_b.get_node_shared_ptr())) {
+        //         new_ops.push_back(reshape_shape);
+        //     }
+        //     new_ops.push_back(fc_input_b.get_node_shared_ptr());
+        // }
 
         // Weights normalization
         if (!matmul->get_transpose_b()) {
@@ -169,10 +171,15 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() {
             fc_input_b = convert;
         }
 
-        // Create FullyConnected
-        auto output_rank = matmul->get_output_partial_shape(0).rank();
-        auto fc = std::make_shared<ov::intel_cpu::FullyConnectedNode>(fc_input_a, fc_input_b, output_rank,
-                matmul->get_output_element_type(0));
+        auto bias_ph = std::make_shared<ov::op::internal::Placeholder>();
+        new_ops.push_back(bias_ph);
+
+        auto fc = std::make_shared<ov::op::internal::FullyConnected>(fc_input_a,
+                                                                     fc_input_b,
+                                                                     bias_ph,
+                                                                     matmul->get_output_element_type(0));
+        // auto fc = std::make_shared<ov::op::internal::FullyConnected>(fc_input_a, fc_input_b, matmul->get_output_element_type(0));
+
         fc->set_friendly_name(matmul->get_friendly_name());
         ///todo: CVS-130863 Remove after fp16_compression is copyable
         if (ov::fp16_compression_is_disabled(matmul))
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp
index 8079286d1e3ad7..c44a4bc0952afe 100644
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_to_power_static.cpp
@@ -12,7 +12,8 @@
 #include "openvino/pass/pattern/op/or.hpp"
 #include "transformations/rt_info/dequantization_node.hpp"
 #include "transformations/cpu_opset/common/op/power_static.hpp"
-#include "transformations/cpu_opset/common/op/fully_connected.hpp"
+#include "ov_ops/fully_connected.hpp"
+// #include "transformations/cpu_opset/common/op/fully_connected.hpp"
 #include "utils/general_utils.h"
 
 #include "itt.hpp"
@@ -47,16 +48,16 @@ bool isConvertableToPowerStatic(const std::shared_ptr<BaseOp> &node) {
     return ov::shape_size(const_shape) == 1 &&
            input_rank.get_length() >= static_cast<int64_t>(const_shape.size()) &&
            !ov::intel_cpu::one_of(node->get_input_node_shared_ptr(nonConstPort)->get_type_info(),
-                                 ov::opset1::NormalizeL2::get_type_info_static(),
-                                 ov::opset4::Interpolate::get_type_info_static(),
-                                 ov::opset1::Convolution::get_type_info_static(),
-                                 ov::opset1::GroupConvolution::get_type_info_static(),
-                                 ov::opset1::ConvolutionBackpropData::get_type_info_static(),
-                                 ov::opset1::GroupConvolutionBackpropData::get_type_info_static(),
-                                 ov::opset1::MatMul::get_type_info_static(),
-                                 ov::intel_cpu::FullyConnectedNode::get_type_info_static(),
-                                 ov::op::v0::MVN::get_type_info_static(),
-                                 ov::opset6::MVN::get_type_info_static());
+                                  ov::opset1::NormalizeL2::get_type_info_static(),
+                                  ov::opset4::Interpolate::get_type_info_static(),
+                                  ov::opset1::Convolution::get_type_info_static(),
+                                  ov::opset1::GroupConvolution::get_type_info_static(),
+                                  ov::opset1::ConvolutionBackpropData::get_type_info_static(),
+                                  ov::opset1::GroupConvolutionBackpropData::get_type_info_static(),
+                                  ov::opset1::MatMul::get_type_info_static(),
+                                  ov::op::internal::FullyConnected::get_type_info_static(),
+                                  ov::op::v0::MVN::get_type_info_static(),
+                                  ov::opset6::MVN::get_type_info_static());
 }
 
 template <>
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp
new file mode 100644
index 00000000000000..c07d45ed200d37
--- /dev/null
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp
@@ -0,0 +1,114 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "fc_bias_fusion.hpp"
+#include <cstdint>
+#include <memory>
+
+#include "openvino/core/type.hpp"
+#include "openvino/op/reshape.hpp"
+#include "openvino/pass/pattern/op/or.hpp"
+#include "ov_ops/fully_connected.hpp"
+#include "ov_ops/fully_connected_quantized.hpp"
+#include "ov_ops/placeholder.hpp"
+#include "openvino/core/rt_info.hpp"
+#include "openvino/op/add.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+
+#include "transformations/utils/utils.hpp"
+
+#include "itt.hpp"
+
+ov::intel_cpu::FullyConnectedBiasFusion::FullyConnectedBiasFusion() {
+    MATCHER_SCOPE(FullyConnectedBiasFusion);
+    auto any = ov::pass::pattern::any_input();
+    auto input = any;
+    auto weights = ov::pass::pattern::any_input(ov::pass::pattern::has_static_shape());
+    auto ph = ov::pass::pattern::wrap_type<ov::op::internal::Placeholder>();
+
+    auto has_single_consumer = [](ov::Output<ov::Node> output) {
+        return ov::pass::pattern::consumers_count(1)(output);
+    };
+
+    auto m_fc =
+        ov::pass::pattern::wrap_type<ov::op::internal::FullyConnected>({input, weights, ph}, has_single_consumer);
+
+    auto m_fc_q = ov::pass::pattern::wrap_type<ov::op::internal::FullyConnectedQuantized>(
+        {
+            input,
+            weights,
+            ph,
+            ov::pass::pattern::any_input(),
+            ov::pass::pattern::any_input(),
+            ov::pass::pattern::any_input(),
+            ov::pass::pattern::any_input(),
+            ov::pass::pattern::any_input(),
+            ov::pass::pattern::any_input()
+        },
+        has_single_consumer);
+
+    auto m_fc_or = std::make_shared<pass::pattern::op::Or>(
+        OutputVector{
+            m_fc,
+            m_fc_q,
+        });
+
+    auto m_bias = ov::pass::pattern::any_input(ov::pass::pattern::has_static_shape());
+    auto m_add = ov::pass::pattern::wrap_type<ov::op::v1::Add>({m_fc_or, m_bias});
+
+    ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher &m) {
+        auto& pattern_to_output = m.get_pattern_value_map();
+
+        auto add = pattern_to_output[m_add].get_node_shared_ptr();
+        auto bias = pattern_to_output[m_bias].get_node_shared_ptr();
+        auto fc = pattern_to_output.count(m_fc) ? pattern_to_output[m_fc].get_node_shared_ptr()
+            : pattern_to_output[m_fc_q].get_node_shared_ptr();
+
+        if (transformation_callback(fc)) {
+            return false;
+        }
+
+        if (!std::dynamic_pointer_cast<ov::op::v0::Constant>(bias)) {
+            return false;
+        }
+
+        ov::Shape bias_shape(bias->get_shape());
+        ov::PartialShape output_shape(fc->get_output_partial_shape(0));
+        size_t bias_size = ov::shape_size(bias_shape);
+        auto rank = output_shape.rank().get_length();
+        if (rank == 0 || output_shape[rank - 1].is_dynamic()) {
+            return false;
+        }
+
+        if (bias_shape.empty() || static_cast<int64_t>(bias_shape.back()) != output_shape[rank - 1].get_length() || bias_shape.back() != bias_size) {
+            return false;
+        }
+
+        ov::NodeVector new_ops;
+
+        std::shared_ptr<ov::Node> final_bias = bias;
+        if (bias_shape.size() >= 2) {
+            auto reshape_const = ov::op::v0::Constant::create(ov::element::i64, ov::Shape{ 1 }, { -1 });
+            final_bias = ov::op::util::make_try_fold<ov::op::v1::Reshape>(final_bias, reshape_const, true);
+            new_ops.push_back(final_bias);
+        }
+
+        std::shared_ptr<ov::Node> fc_with_bias;
+
+        // @todo can be replaced by some virtual function, i.e. clone_with_new_bias()
+        // so we don't need to down cast here
+        auto fc_node = std::dynamic_pointer_cast<ov::op::internal::FullyConnected>(fc);
+        fc_with_bias = fc_node->fuse_bias(final_bias);
+
+        new_ops.push_back(fc_with_bias);
+
+        fc_with_bias->set_friendly_name(add->get_friendly_name());
+        ov::copy_runtime_info({fc, add}, new_ops);
+        ov::replace_node(add, fc_with_bias);
+        return true;
+    };
+
+    auto m = std::make_shared<ov::pass::pattern::Matcher>(m_add, matcher_name);
+    this->register_matcher(m, callback);
+}
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.hpp
new file mode 100644
index 00000000000000..e10af028544b61
--- /dev/null
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.hpp
@@ -0,0 +1,27 @@
+// Copyright (C) 2018-2023 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/graph_rewrite.hpp"
+
+namespace ov {
+namespace intel_cpu {
+
+class FullyConnectedBiasFusion : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("FullyConnectedBiasFusion", "0");
+    FullyConnectedBiasFusion();
+};
+
+class FullyConnectedBiasFusions : public ov::pass::GraphRewrite {
+public:
+    OPENVINO_RTTI("FullyConnectedBiasFusion", "0");
+    FullyConnectedBiasFusions() {
+        add_matcher<FullyConnectedBiasFusion>();
+    }
+};
+
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp
index e681cd48ce8087..8ea914240f00d5 100644
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/move_fc_reshape_to_weights.cpp
@@ -2,7 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "transformations/cpu_opset/common/op/fully_connected.hpp"
+// #include "transformations/cpu_opset/common/op/fully_connected.hpp"
+#include "ov_ops/fully_connected.hpp"
 #include "move_fc_reshape_to_weights.hpp"
 #include <transformations/utils/utils.hpp>
 #include <openvino/pass/pattern/op/wrap_type.hpp>
@@ -48,7 +49,8 @@ ov::intel_cpu::MoveFCReshapeToWeights::MoveFCReshapeToWeights() {
     auto weights_input_m = std::make_shared<ov::pass::pattern::op::Or>(ov::OutputVector{reshape_m, transpose_m});
 
     auto data_m = any_input();
-    auto fully_connected_m = wrap_type<ov::intel_cpu::FullyConnectedNode>({data_m, weights_input_m});
+    auto bias_m = any_input();
+    auto fully_connected_m = wrap_type<ov::op::internal::FullyConnected>({data_m, weights_input_m, bias_m});
 
     ov::matcher_pass_callback callback = [&](ov::pass::pattern::Matcher& m) {
         const auto fully_connected = m.get_match_root();
diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
index 20502f67d3645e..12f6395820eb86 100644
--- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
+++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp
@@ -6,6 +6,7 @@
 #include "openvino/op/fake_quantize.hpp"
 #include "openvino/pass/manager.hpp"
 #include "common/pass/align_matmul_input_ranks.hpp"
+#include "transformations/common_optimizations/nop_elimination.hpp"
 #include "transformations/common_optimizations/reshape_prelu.hpp"
 #include "common/pass/convert_broadcast_to_tiles.hpp"
 #include "common/pass/convert_tile_to_seq_tiles.hpp"
@@ -14,8 +15,11 @@
 #include "common/pass/convert_to_leaky_relu.hpp"
 #include "common/pass/convert_to_swish_cpu.hpp"
 #include "common/pass/move_fc_reshape_to_weights.hpp"
+#include "common/pass/fc_bias_fusion.hpp"
 #include "common/pass/split_fc.hpp"
 #include "transformations/convert_precision.hpp"
+#include "transformations/op_conversions/convert_fc_to_compressed.hpp"
+#include "transformations/op_conversions/convert_fc_to_quantized.hpp"
 #include "transformations/utils/utils.hpp"
 #include "common/pass/rnn_sequences_optimization.hpp"
 #include "transformations/common_optimizations/reshape_sequence_fusion.hpp"
@@ -31,7 +35,31 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr<ov::Model> &model) {
 
     ov::pass::Manager manager("CPU:ConvertToCPUSpecificOpset");
     manager.set_per_pass_validation(false);
+
+    // CPU_REGISTER_PASS_COMMON(manager, AlignMatMulInputRanks);
     CPU_REGISTER_PASS_COMMON(manager, ConvertMatMulToFC);
+    if (std::getenv("EXTRA_DUMP")) {
+        manager.run_passes(model);
+        ov::pass::Serialize("after_fc.xml", "/dev/null").run_on_model(model);
+        CPU_DISABLE_PASS_COMMON(manager, ConvertMatMulToFC);
+    }
+    CPU_REGISTER_PASS_X64(manager, pass::ConvertFullyConnectedToFullyConnectedCompressed);
+    // CPU_SET_CALLBACK_COMMON(manager,
+    //     [](const std::shared_ptr<const ov::Node>& node) -> bool {
+    //         const auto& weights = node->input_value(1);
+    //         const auto& weights_shape = weights.get_shape();
+    //         const auto OC = *(weights_shape.rbegin() + 1);
+    //         return OC == 1;
+    //     },
+    //     pass::ConvertFullyConnectedToFullyConnectedCompressed);
+
+    CPU_REGISTER_PASS_X64(manager, pass::ConvertFullyConnectedToFullyConnectedQuantized);
+    if (std::getenv("EXTRA_DUMP")) {
+        manager.run_passes(model);
+        ov::pass::Serialize("after_fc_quantized.xml", "/dev/null").run_on_model(model);
+        CPU_DISABLE_PASS_COMMON(manager, ConvertMatMulToFC);
+    }
+    CPU_REGISTER_PASS_COMMON(manager, FullyConnectedBiasFusion);
     CPU_REGISTER_PASS_X64(manager, MoveFCReshapeToWeights);
     CPU_REGISTER_PASS_X64(manager, ov::pass::Validate);
     CPU_REGISTER_PASS_COMMON(manager, AlignMatMulInputRanks);
diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
index 69b3da9be00227..5aae9ad3ea8c59 100644
--- a/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
+++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.cpp
@@ -2,6 +2,7 @@
 // Copyright (C) 2018-2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
+#include "openvino/core/type/element_type.hpp"
 #ifdef CPU_DEBUG_CAPS
 
 #include "cpu_memory.h"
@@ -316,7 +317,7 @@ std::ostream & operator<<(std::ostream & os, const Node &c_node) {
             void * data = pmem->getData();
             auto shape = pmem->getDesc().getShape().getDims();
 
-            if (shape_size(shape) <= 8) {
+            if (shape_size(shape) <= 8 && pmem->getDesc().getPrecision() != ov::element::undefined) {
                 auto type = pmem->getDesc().getPrecision();
                 auto tensor = ov::Tensor(type, shape, data);
                 auto constop = std::make_shared<ov::op::v0::Constant>(tensor);
@@ -669,7 +670,7 @@ std::ostream& operator<<(std::ostream& os, const IMemory& mem) {
     }
     return os;
 }
-// @todo remove
+
 void print_dnnl_memory(const dnnl::memory& memory, const size_t size, const int id, const char* message) {
     const size_t s = memory.get_desc().get_size() / sizeof(float);
     std::cout << message << " " << id << " size: " << s << ", values: ";
diff --git a/src/plugins/intel_cpu/src/utils/debug_capabilities.h b/src/plugins/intel_cpu/src/utils/debug_capabilities.h
index cea96c6cfdbd72..4bee8361ecbc8f 100644
--- a/src/plugins/intel_cpu/src/utils/debug_capabilities.h
+++ b/src/plugins/intel_cpu/src/utils/debug_capabilities.h
@@ -3,6 +3,7 @@
 //
 #pragma once
 
+#include "cpu_types.h"
 #include "openvino/util/env_util.hpp"
 #ifdef CPU_DEBUG_CAPS
 
@@ -94,6 +95,12 @@ class PrintableTimer {
     }
 };
 
+template<typename T>
+std::ostream & operator<<(std::ostream & os, const std::vector<T> vec) {
+    for (const auto& element : vec)
+        os << element << "x";
+    return os;
+}
 std::ostream & operator<<(std::ostream & os, const PortConfig& desc);
 std::ostream & operator<<(std::ostream & os, const NodeConfig& desc);
 std::ostream & operator<<(std::ostream & os, const NodeDesc& desc);
diff --git a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake
index 057869a864d87b..6e52f1928a60db 100644
--- a/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake
+++ b/src/plugins/intel_cpu/tests/functional/cmake/target_per_test.cmake
@@ -96,7 +96,9 @@ endif()
 endfunction()
 
 if(ENABLE_CPU_SPECIFIC_TARGET_PER_TEST)
-  create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph)
+    create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src ov_cpu_func_subgraph)
+    create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/x64 ov_cpu_func_subgraph_x64)
+    create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/subgraph_tests/src/common ov_cpu_func_subgraph_common)
   create_target_per_test_for_directory(${CMAKE_CURRENT_SOURCE_DIR}/custom/single_layer_tests ov_cpu_func_slt)
 endif()
 
diff --git a/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp b/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp
index cb085920d97dc5..2fa3554f60e17a 100644
--- a/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp
+++ b/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp
@@ -11,7 +11,7 @@
 #include <openvino/opsets/opset1.hpp>
 #include <openvino/opsets/opset3.hpp>
 #include <openvino/opsets/opset7.hpp>
-#include <transformations/cpu_opset/common/op/fully_connected.hpp>
+#include "ov_ops/fully_connected.hpp"
 #include <transformations/cpu_opset/common/pass/convert_matmul_to_fc.hpp>
 #include <transformations/init_node_info.hpp>
 #include <transformations/utils/utils.hpp>
@@ -19,6 +19,7 @@
 #include <ov_ops/type_relaxed.hpp>
 
 #include "common_test_utils/ov_test_utils.hpp"
+#include "ov_ops/placeholder.hpp"
 #include "transformations/rt_info/decompression.hpp"
 
 using namespace testing;
@@ -38,11 +39,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest1) {
         auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 });
         auto transpose1 = std::make_shared<ov::opset1::Transpose>(input1, transpose_constant1);
 
-        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 2 }, { 1 });
-        auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 });
+        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 2 }, { 1 });
+        auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, {0, 2, 1 });
         auto transpose2 = std::make_shared<ov::opset1::Transpose>(input2, transpose_constant2);
 
-        auto matmul = std::make_shared<FullyConnectedNode>(transpose1, transpose2, ov::Rank(3));
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(transpose1,
+                                                                         transpose2,
+                                                                         std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 });
     }
@@ -78,7 +81,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest3) {
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{3, 2, 2});
         auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1});
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(3));
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1, input2, std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{matmul}, ov::ParameterVector{input1});
     }
@@ -96,7 +99,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest4) {
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::PartialShape{-1, -1, 2});
         auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1});
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(3));
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1, input2, std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{matmul}, ov::ParameterVector{input1});
     }
@@ -132,7 +135,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest7) {
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{3, 2, 2});
         auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2}, {1});
-        auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(2));
+        auto fc = std::make_shared<ov::op::internal::FullyConnected>(input1, input2, std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{fc}, ov::ParameterVector{input1});
     }
@@ -151,7 +154,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest8) {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::PartialShape{-1, -1, 2});
         auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2}, {1});
 
-        auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(2));
+        auto fc = std::make_shared<ov::op::internal::FullyConnected>(input1, input2, std::make_shared<ov::op::internal::Placeholder>());
         auto a_shape = std::make_shared<ov::opset3::ShapeOf>(input1);
 
         auto I = ov::op::util::node_to_get_shape_value_of_indices_from_shape_node(a_shape, {0, 1});
@@ -174,7 +177,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest9) {
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{3, 2, 2});
         auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1});
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(3));
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1, input2, std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{matmul}, ov::ParameterVector{input1});
     }
@@ -218,8 +221,8 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest13) {
     }
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::PartialShape{-1, -1, 1});
-        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{80, 1}, {1});
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(3));
+        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 80, 1}, {1});
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1, input2, std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{matmul}, ov::ParameterVector{input1});
     }
@@ -242,8 +245,12 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest14) {
     }
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::u8, ov::PartialShape{-1, -1, 1});
-        auto input2 = ov::opset1::Constant::create(ov::element::i8, ov::Shape{80, 1}, {1});
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(3), ov::element::f32);
+        auto input2 = ov::opset1::Constant::create(ov::element::i8, ov::Shape{1, 80, 1}, {1});
+
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                         input2,
+                                                                         std::make_shared<ov::op::internal::Placeholder>(),
+                                                                         ov::element::f32);
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{matmul}, ov::ParameterVector{input1});
     }
@@ -261,7 +268,11 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_1) {
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{2, 3, 4, 5});
         auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 6, 5 }, { 1 });
-        auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(4), ov::element::f32);
+
+        auto fc = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                     input2,
+                                                                     std::make_shared<ov::op::internal::Placeholder>(),
+                                                                     ov::element::f32);
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{fc}, ov::ParameterVector{input1});
     }
@@ -278,8 +289,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_2) {
     }
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::PartialShape{-1, -1, 1, 5});
-        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{10, 5}, {1});
-        auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(4));
+        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 10, 5}, {1});
+        auto fc = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                     input2,
+                                                                     std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{fc}, ov::ParameterVector{input1});
     }
@@ -296,8 +309,11 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_3) {
     }
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{2, 4});
-        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{5, 4}, { 1 });
-        auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(4), ov::element::f32);
+        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 });
+        auto fc = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                     input2,
+                                                                     std::make_shared<ov::op::internal::Placeholder>(),
+                                                                     ov::element::f32);
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{fc}, ov::ParameterVector{input1});
     }
@@ -314,8 +330,11 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_4) {
     }
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{3, 2, 4});
-        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{5, 4}, { 1 });
-        auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(4), ov::element::f32);
+        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 });
+        auto fc = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                     input2,
+                                                                     std::make_shared<ov::op::internal::Placeholder>(),
+                                                                     ov::element::f32);
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{fc}, ov::ParameterVector{input1});
     }
@@ -332,8 +351,11 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_5) {
     }
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{2, 3, 2, 4});
-        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{5, 4}, { 1 });
-        auto fc = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(4), ov::element::f32);
+        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 });
+        auto fc = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                     input2,
+                                                                     std::make_shared<ov::op::internal::Placeholder>(),
+                                                                     ov::element::f32);
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{fc}, ov::ParameterVector{input1});
     }
@@ -350,8 +372,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_1) {
     }
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{5, 2, 3});
-        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 3}, {1});
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, input2, ov::Rank(2));
+        auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 3}, {1});
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                         input2,
+                                                                         std::make_shared<ov::op::internal::Placeholder>());
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{matmul}, ov::ParameterVector{input1});
     }
 }
@@ -368,7 +392,9 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_2) {
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{ 2, 3 });
         auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 3 }, { 1 });
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, weights, ov::Rank(2));
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                         weights,
+                                                                         std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 });
     }
@@ -386,8 +412,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_3) {
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{ 5, 2, 3 });
 
-        auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 3 }, { 1 });
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, weights,  ov::Rank(3));
+        auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 1, 2, 3 }, { 1 });
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                         weights,
+                                                                         std::make_shared<ov::op::internal::Placeholder>());
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 });
     }
 }
@@ -406,12 +434,14 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_0) {
     {
         auto input1 = std::make_shared<ov::opset1::Parameter>(ov::element::f32, ov::Shape{ 3, 2, 2 });
 
-        auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 2, 2 }, { 1 });
-        auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 });
+        auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 1, 2, 2 }, { 1 });
+        auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 });
         auto transpose = std::make_shared<ov::opset1::Transpose>(input2, transpose_constant);
         auto convert = std::make_shared<ov::opset1::Convert>(transpose, ov::element::f32);
 
-        auto matmul = std::make_shared<FullyConnectedNode>(input1, convert, ov::Rank(3));
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(input1,
+                                                                         convert,
+                                                                         std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 });
     }
@@ -433,12 +463,14 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_1) {
         auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 });
         auto transpose1 = std::make_shared<ov::opset1::Transpose>(input1, transpose_constant1);
 
-        auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 2, 2 }, { 1 });
-        auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 2 }, { 1, 0 });
+        auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 1, 2, 2 }, { 1 });
+        auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 });
         auto transpose2 = std::make_shared<ov::opset1::Transpose>(input2, transpose_constant2);
         auto convert = std::make_shared<ov::opset1::Convert>(transpose2, ov::element::f32);
 
-        auto matmul = std::make_shared<FullyConnectedNode>(transpose1, convert, ov::Rank(3));
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(transpose1,
+                                                                         convert,
+                                                                         std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 });
     }
@@ -467,11 +499,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_compressed_u8_weights) {
         auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 2}, {1});
         auto mul = std::make_shared<ov::opset1::Multiply>(sub, mul_const);
 
-        auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {2}, {2, -1});
-        auto reshape = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, false);
-        auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {2}, {1, 0});
-        auto transpose = std::make_shared<ov::opset1::Transpose>(reshape, transpose_const);
-        auto matmul = std::make_shared<FullyConnectedNode>(data, transpose, ov::Rank(3));
+        // auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {2}, {2, -1});
+        // auto reshape = std::make_shared<ov::opset1::Reshape>(mul, reshape_const, false);
+        auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, {0, 2, 1});
+        auto transpose = std::make_shared<ov::opset1::Transpose>(mul, transpose_const);
+        auto matmul = std::make_shared<ov::op::internal::FullyConnected>(data,
+                                                                         transpose,
+                                                                         std::make_shared<ov::op::internal::Placeholder>());
 
         model_ref = std::make_shared<ov::Model>(ov::NodeVector{ matmul }, ov::ParameterVector{ data });
     }
diff --git a/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp b/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp
index 68241c9169bce7..6b8268729d3457 100644
--- a/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp
+++ b/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp
@@ -11,11 +11,12 @@
 
 #include <openvino/core/model.hpp>
 #include <openvino/opsets/opset1.hpp>
-#include <transformations/cpu_opset/common/op/fully_connected.hpp>
+#include "ov_ops/fully_connected.hpp"
 #include <transformations/init_node_info.hpp>
 #include <transformations/utils/utils.hpp>
 
 #include "common_test_utils/ov_test_utils.hpp"
+#include "ov_ops/placeholder.hpp"
 
 using namespace testing;
 using namespace ov::intel_cpu;
@@ -115,7 +116,8 @@ class MoveFCReshapeToWeightsTests : public TransformationTestsF, public WithPara
             auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {2}, {1, 0});
             weights_path = std::make_shared<ov::opset1::Transpose>(weights_path, transpose_const);
         }
-        auto fully_connected = std::make_shared<FullyConnectedNode>(data, weights_path, ov::Rank(3));
+
+        auto fully_connected = std::make_shared<ov::op::internal::FullyConnected>(data, weights_path, std::make_shared<ov::op::internal::Placeholder>());
         return std::make_shared<ov::Model>(ov::NodeVector{fully_connected}, ov::ParameterVector{data});
     }
 
diff --git a/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp
index f5453b3c536480..fd400d84bfec17 100644
--- a/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp
+++ b/src/tests/test_utils/common_test_utils/src/ov_test_utils.cpp
@@ -72,6 +72,7 @@ void TransformationTestsF::TearDown() {
     manager.register_pass<ov::pass::CheckUniqueNames>(m_unh, m_soft_names_comparison, m_result_friendly_names_check);
     manager.run_passes(model);
 
+    // why do we need this check for fused names?
     if (!m_disable_rt_info_check) {
         OV_ASSERT_NO_THROW(check_rt_info(model));
     }
@@ -85,6 +86,7 @@ void TransformationTestsF::TearDown() {
         ASSERT_TRUE(res.valid) << res.message;
         comparator.disable(FunctionsComparator::CmpValues::ACCURACY);
     }
+
     auto res = comparator.compare(model, model_ref);
     ASSERT_TRUE(res.valid) << res.message;
 }