Skip to content

Commit

Permalink
[GPU] Fix lockable requirement check for shape_of consumer (openvinot…
Browse files Browse the repository at this point in the history
…oolkit#26723)

### Details:
- We had an exception on attempt to set remote tensor if input had
multiple GPU users and ShapeOf node with CPU impl. This patch adds
special handling for shape_of node to avoid it's requirement for
lockable inputs as it doesn't touch tensor.
- Note: Likely it will still fail for other CPU node types, so need to
investigate that further.
  • Loading branch information
vladimir-paramuzov authored Sep 23, 2024
1 parent f00ac41 commit c805873
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/program_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ bool program_node::is_fused_dep(size_t dep_idx) const {
std::set<size_t> program_node::get_lockable_input_ids() const {
const auto impl = get_selected_impl();
const bool has_cpu_impl = get_preferred_impl_type() == impl_types::cpu || (impl && impl->is_cpu());
if (has_cpu_impl) {
if (has_cpu_impl && !is_type<shape_of>()) {
std::set<size_t> dependencies_indexes;
for (size_t i = 0; i < get_dependencies().size(); i++)
dependencies_indexes.insert(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
//

#include "openvino/core/preprocess/pre_post_process.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/parameter.hpp"
#include "openvino/op/shape_of.hpp"
#include "openvino/runtime/intel_gpu/ocl/ocl.hpp"
#include "openvino/runtime/intel_gpu/properties.hpp"
#include "openvino/runtime/remote_tensor.hpp"
Expand Down Expand Up @@ -2616,6 +2620,45 @@ ov::RemoteTensor create_tensor(ov::intel_gpu::ocl::ClContext context,
}
} // namespace

TEST(RemoteTensor, smoke_LockableHandling) {
#if defined(ANDROID)
GTEST_SKIP();
#endif

auto core = ov::Core();
auto remote_context = core.get_default_context(ov::test::utils::DEVICE_GPU);
auto gpu_context = remote_context.as<ov::intel_gpu::ocl::ClContext>();
auto type = ov::element::f32;
ov::Shape shape = {4};

auto remote_tensor = gpu_context.create_tensor(type, shape);

auto host_tensor_in = ov::Tensor(type, shape);
init_tensor(host_tensor_in);
remote_tensor.copy_from(host_tensor_in);

auto param_node = std::make_shared<ov::op::v0::Parameter>(type, ov::PartialShape{-1});
auto const_node = std::make_shared<ov::op::v0::Constant>(host_tensor_in);
auto add_node = std::make_shared<ov::op::v1::Add>(param_node, const_node);
auto shape_of_node = std::make_shared<ov::op::v3::ShapeOf>(param_node);
auto res1 = std::make_shared<ov::op::v0::Result>(add_node);
auto res2 = std::make_shared<ov::op::v0::Result>(shape_of_node);
auto model = std::make_shared<ov::Model>(ov::ResultVector{res1, res2}, ov::ParameterVector{param_node});

auto compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f32));
auto request = compiled_model.create_infer_request();
request.set_input_tensor(remote_tensor);

request.infer();
auto res = request.get_output_tensor(0);
auto host_res = ov::Tensor(type, shape);
res.copy_to(host_res);

for (size_t i = 0; i < ov::shape_size(host_tensor_in.get_shape()); i++) {
ASSERT_EQ(host_res.data<float>()[i], host_tensor_in.data<float>()[i] * 2);
}
}

TEST_P(RemoteTensor, smoke_CopyFrom) {
#if defined(ANDROID)
GTEST_SKIP();
Expand Down

0 comments on commit c805873

Please sign in to comment.