Skip to content

Added new example of Python object exposing __sycl_usm_array_interface__ #596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions examples/cython/sycl_buffer/use_sycl_buffer.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
//=- use_sycl_buffer.cpp - Example of SYCL code to be called from Cython =//
//
// Data Parallel Control (dpctl)
//
// Copyright 2020-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements SYCL code to compute columnwise total of a matrix,
/// provided as host C-contiguous allocation. SYCL kernels access this memory
/// using `sycl::buffer`. Two routines are provided. One solves the task by
/// calling BLAS function GEMV from Intel(R) Math Kernel Library, the other
/// performs the computation using DPC++ reduction group function and atomics.
///
//===----------------------------------------------------------------------===//

#include "use_sycl_buffer.h"
#include "dpctl_sycl_types.h"
#include <CL/sycl.hpp>
Expand Down
28 changes: 28 additions & 0 deletions examples/cython/sycl_direct_linkage/sycl_function.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,31 @@
//=- use_sycl_buffer.cpp - Example of SYCL code to be called from Cython =//
//
// Data Parallel Control (dpctl)
//
// Copyright 2020-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements SYCL code to compute columnwise total of a matrix,
/// provided as host C-contiguous allocation. SYCL kernels access this memory
/// using `sycl::buffer`. The routine solves the task by calling BLAS function
// GEMV from Intel(R) Math Kernel Library.
///
//===----------------------------------------------------------------------===//

#include "sycl_function.hpp"
#include "mkl.h"
#include <CL/sycl.hpp>
Expand Down
28 changes: 28 additions & 0 deletions examples/cython/usm_memory/sycl_blackscholes.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,31 @@
//=- sycl_blackscholes.cpp - Example of SYCL code to be called from Cython =//
//
// Data Parallel Control (dpctl)
//
// Copyright 2020-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements SYCL code to price European vanilla options using
/// Black-Scholes formula, as well as code to generate option parameters using
/// SYCL device random number generation library from Intel(R) Math Kernel
/// Library.
///
//===----------------------------------------------------------------------===//

#include "sycl_blackscholes.hpp"
#include "dpctl_sycl_types.h"
#include <CL/sycl.hpp>
Expand Down
26 changes: 26 additions & 0 deletions examples/cython/usm_memory/sycl_blackscholes.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,29 @@
//=- sycl_blackscholes.hpp - Example of SYCL code to be called from Cython =//
//
// Data Parallel Control (dpctl)
//
// Copyright 2020-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file exports C++ functions to be called from Cython-generated
/// extensions.
///
//===----------------------------------------------------------------------===//

#include "dpctl_sycl_types.h"
#include <CL/sycl.hpp>

Expand Down
29 changes: 29 additions & 0 deletions examples/pybind11/external_usm_allocation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Exposing USM allocations made by native code to dpctl

This extension demonstrates how a Python object backed by
a native class, which allocates USM memory, can expose it
to dpctl.memory entities using `__sycl_usm_array_interface__`.


# Building extension

```
source /opt/intel/oneapi/compiler/latest/env/vars.sh
CXX=dpcpp CC=dpcpp python setup.py build_ext --inplace
python example.py
```

# Sample output

```
(idp) [12:43:20 ansatnuc04 external_usm_allocation]$ python example.py
<external_usm_alloc.DMatrix object at 0x7f2b98b4cef0>
{'data': [94846745444352, True], 'shape': (5, 5), 'strides': None, 'version': 1, 'typestr': '|f8', 'syclobj': <capsule object "SyclQueueRef" at 0x7f2b9b941d80>}
shared

[1.0, 1.0, 1.0, 2.0, 2.0]
[1.0, 0.0, 1.0, 2.0, 2.0]
[1.0, 1.0, 0.0, 2.0, 2.0]
[0.0, 0.0, 0.0, 3.0, -1.0]
[0.0, 0.0, 0.0, -1.0, 5.0]
```
161 changes: 161 additions & 0 deletions examples/pybind11/external_usm_allocation/_usm_alloc_example.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
//==- _usm_alloc_example.cpp - Example of Pybind11 extension exposing --===//
// native USM allocation to Python in such a way that dpctl.memory
// can form views into it.
//
// Data Parallel Control (dpctl)
//
// Copyright 2020-2021 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements Pybind11-generated extension that creates Python type
/// backed-up by C++ class DMatrix, which creates a USM allocation associated
/// with a given dpctl.SyclQueue. The Python object of this type implements
/// __sycl_usm_array_interface__, allowing dpctl.memory.as_usm_memory to form
/// a view into this allocation, and modify it from Python.
///
/// The DMatrix type object also implements `.tolist()` method which copies
/// content of the object into list of lists of Python floats.
///
//===----------------------------------------------------------------------===//
#include <CL/sycl.hpp>

// clang-format off
#include "dpctl_sycl_types.h"
#include "../_sycl_queue.h"
#include "../_sycl_queue_api.h"
// clang-format on

#include "pybind11/pybind11.h"
#include "pybind11/stl.h"

namespace py = pybind11;

struct DMatrix
{
using alloc_t = sycl::usm_allocator<double, sycl::usm::alloc::shared>;
using vec_t = std::vector<double, alloc_t>;

DMatrix(sycl::queue &q, size_t rows, size_t columns)
: n_(rows), m_(columns), q_(q), alloc_(q), vec_(n_ * m_, alloc_)
{
}
~DMatrix(){};
DMatrix(const DMatrix &) = default;
DMatrix(DMatrix &&) = default;

size_t get_n() const
{
return n_;
}
size_t get_m() const
{
return m_;
}
vec_t &get_vector()
{
return vec_;
}
sycl::queue get_queue() const
{
return q_;
}

double get_element(size_t i, size_t j)
{
return vec_.at(i * m_ + j);
}

private:
size_t n_;
size_t m_;
sycl::queue q_;
alloc_t alloc_;
vec_t vec_;
};

DMatrix create_matrix(py::object queue, size_t n, size_t m)
{
PyObject *queue_ptr = queue.ptr();
if (PyObject_TypeCheck(queue_ptr, &PySyclQueueType)) {
DPCTLSyclQueueRef QRef =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not we need to free the QRef?

Copy link
Contributor Author

@oleksandr-pavlyk oleksandr-pavlyk Sep 27, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It appears we do not need to free QRef. The DPCTLSyclQueueRef get_queue_ref(SyclQueue q) function returns the reference stored in the Python object (https://github.com/IntelPython/dpctl/blob/master/dpctl/_sycl_queue.pyx#L691), so resources referenced by QRef are managed by the Python object itself.

DMatrix makes a copy of sycl::queue in its constructor, so it should all be good.

get_queue_ref(reinterpret_cast<PySyclQueueObject *>(queue_ptr));
sycl::queue *q = reinterpret_cast<sycl::queue *>(QRef);

return DMatrix(*q, n, m);
}
else {
throw std::runtime_error("expected dpctl.SyclQueue as argument");
}
}

py::dict construct_sua_iface(DMatrix &m)
{
// need "version", "data", "shape", "typestr", "syclobj"
py::tuple shape = py::make_tuple(m.get_n(), m.get_m());
py::list data_entry(2);
data_entry[0] = reinterpret_cast<size_t>(m.get_vector().data());
data_entry[1] = true;
auto syclobj = py::capsule(
reinterpret_cast<void *>(new sycl::queue(m.get_queue())),
"SyclQueueRef", [](PyObject *cap) {
if (cap) {
auto name = PyCapsule_GetName(cap);
std::string name_s(name);
if (name_s == "SyclQueueRef" or name_s == "used_SyclQueueRef") {
void *p = PyCapsule_GetPointer(cap, name);
delete reinterpret_cast<sycl::queue *>(p);
}
}
});
py::dict iface;
iface["data"] = data_entry;
iface["shape"] = shape;
iface["strides"] = py::none();
iface["version"] = 1;
iface["typestr"] = "|f8";
iface["syclobj"] = syclobj;

return iface;
}

py::list tolist(DMatrix &m)
{
size_t rows_count = m.get_n();
size_t cols_count = m.get_m();
py::list rows(rows_count);
for (size_t i = 0; i < rows_count; ++i) {
py::list row_i(cols_count);
for (size_t j = 0; j < cols_count; ++j) {
row_i[j] = m.get_element(i, j);
}
rows[i] = row_i;
}
return rows;
}

PYBIND11_MODULE(external_usm_alloc, m)
{
// Import the dpctl._sycl_queue extension
import_dpctl___sycl_queue();

py::class_<DMatrix> dm(m, "DMatrix");
dm.def(py::init(&create_matrix),
"DMatrix(dpctl.SyclQueue, n_rows, n_cols)");
dm.def_property("__sycl_usm_array_interface__", &construct_sua_iface,
nullptr);
dm.def("tolist", &tolist, "Return matrix a Python list of lists");
}
52 changes: 52 additions & 0 deletions examples/pybind11/external_usm_allocation/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Data Parallel Control (dpctl)
#
# Copyright 2020-2021 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# coding: utf-8

import external_usm_alloc as eua
import numpy as np

import dpctl
import dpctl.memory as dpm

q = dpctl.SyclQueue("gpu")
matr = eua.DMatrix(q, 5, 5)

print(matr)
print(matr.__sycl_usm_array_interface__)

blob = dpm.as_usm_memory(matr)

print(blob.get_usm_type())

Xh = np.array(
[
[1, 1, 1, 2, 2],
[1, 0, 1, 2, 2],
[1, 1, 0, 2, 2],
[0, 0, 0, 3, -1],
[0, 0, 0, -1, 5],
],
dtype="d",
)
host_bytes_view = Xh.reshape((-1)).view(np.ubyte)

blob.copy_from_host(host_bytes_view)

print("")
list_of_lists = matr.tolist()
for row in list_of_lists:
print(row)
Loading