IntelPython
diff --git a/‎dpctl/tensor/__init__.py
Lines changed: 3 additions & 1 deletion b/‎dpctl/tensor/__init__.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎dpctl/tensor/_elementwise_funcs.py
Lines changed: 21 additions & 1 deletion b/‎dpctl/tensor/_elementwise_funcs.py
Lines changed: 21 additions & 1 deletion
diff --git a/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp
Lines changed: 25 additions & 131 deletions b/‎dpctl/tensor/libtensor/include/kernels/elementwise_functions/abs.hpp
Lines changed: 25 additions & 131 deletions
@@ -90,7 +90,7 @@
 from dpctl.tensor._usmarray import usm_ndarray
 
 from ._constants import e, inf, nan, newaxis, pi
-from ._elementwise_funcs import abs, add, cos, isnan
+from ._elementwise_funcs import abs, add, cos, isfinite, isinf, isnan
 
 __all__ = [
     "Device",
@@ -168,5 +168,7 @@
     "abs",
     "add",
     "cos",
+    "isinf",
     "isnan",
+    "isfinite",
 ]
@@ -42,12 +42,32 @@
 
 cos = UnaryElementwiseFunc("cos", ti._cos_result_type, ti._cos, _cos_docstring)
 
+# ISFINITE
+
+_isfinite_docstring_ = """
+Computes if every element of input array is a finite number.
+"""
+
+isfinite = UnaryElementwiseFunc(
+    "isfinite", ti._isfinite_result_type, ti._isfinite, _isfinite_docstring_
+)
+
 # ISNAN
 
 _isnan_docstring_ = """
-Computes if ever element of input array is a NaN.
+Computes if every element of input array is a NaN.
 """
 
 isnan = UnaryElementwiseFunc(
     "isnan", ti._isnan_result_type, ti._isnan, _isnan_docstring_
 )
+
+# ISINF
+
+_isinf_docstring_ = """
+Computes if every element of input array is an infinity.
+"""
+
+isinf = UnaryElementwiseFunc(
+    "isinf", ti._isinf_result_type, ti._isinf, _isinf_docstring_
+)
@@ -1,11 +1,15 @@
 #pragma once
 #include <CL/sycl.hpp>
 
+#include "kernels/elementwise_functions/common.hpp"
+
 #include "utils/offset_utils.hpp"
 #include "utils/type_dispatch.hpp"
 #include "utils/type_utils.hpp"
 #include <pybind11/pybind11.h>
 
+#include <iostream>
+
 namespace dpctl
 {
 namespace tensor
@@ -18,120 +22,40 @@ namespace abs
 namespace py = pybind11;
 namespace td_ns = dpctl::tensor::type_dispatch;
 
-template <typename argT,
-          typename resT = argT,
-          unsigned int vec_sz = 4,
-          unsigned int n_vecs = 2>
-struct AbsContigFunctor
+using dpctl::tensor::type_utils::is_complex;
+
+template <typename argT, typename resT> struct AbsFunctor
 {
-private:
-    const argT *in = nullptr;
-    resT *out = nullptr;
-    const size_t nelems_;
 
-public:
-    AbsContigFunctor(const argT *inp, resT *res, const size_t n_elems)
-        : in(inp), out(res), nelems_(n_elems)
-    {
-    }
+    using is_constant = typename std::false_type;
+    // constexpr resT constant_value = resT{};
+    using supports_vec = typename std::false_type;
+    using supports_sg_loadstore = typename std::negation<
+        std::disjunction<is_complex<resT>, is_complex<argT>>>;
 
-    void operator()(sycl::nd_item<1> ndit) const
+    resT operator()(const argT &x)
     {
-        /* Each work-item processes vec_sz elements, contiguous in memory */
-        /* NOTE: vec_sz must divide sg.max_local_range()[0] */
 
         if constexpr (std::is_same_v<argT, bool> ||
                       (std::is_integral<argT>::value &&
                        std::is_unsigned<argT>::value))
         {
             static_assert(std::is_same_v<resT, argT>);
-
-            auto sg = ndit.get_sub_group();
-            std::uint8_t sgSize = sg.get_local_range()[0];
-            std::uint8_t max_sgSize = sg.get_max_local_range()[0];
-            size_t base = n_vecs * vec_sz *
-                          (ndit.get_group(0) * ndit.get_local_range(0) +
-                           sg.get_group_id()[0] * max_sgSize);
-
-            if (base + n_vecs * vec_sz * sgSize < nelems_ &&
-                sgSize == max_sgSize) {
-                using in_ptrT =
-                    sycl::multi_ptr<const argT,
-                                    sycl::access::address_space::global_space>;
-                using out_ptrT =
-                    sycl::multi_ptr<resT,
-                                    sycl::access::address_space::global_space>;
-                sycl::vec<argT, vec_sz> arg_vec;
-
-#pragma unroll
-                for (std::uint8_t it = 0; it < n_vecs * vec_sz; it += vec_sz) {
-                    arg_vec = sg.load<vec_sz>(in_ptrT(&in[base + it * sgSize]));
-                    sg.store<vec_sz>(out_ptrT(&out[base + it * sgSize]),
-                                     arg_vec);
-                }
-            }
-            else {
-                for (size_t k = base + sg.get_local_id()[0]; k < nelems_;
-                     k += sgSize) {
-                    out[k] = in[k];
-                }
-            }
+            return x;
         }
         else {
-            using dpctl::tensor::type_utils::is_complex;
-            if constexpr (is_complex<argT>::value) {
-                std::uint8_t sgSize = ndit.get_sub_group().get_local_range()[0];
-                size_t base = ndit.get_global_linear_id();
-
-                base = (base / sgSize) * sgSize * n_vecs * vec_sz +
-                       (base % sgSize);
-                for (size_t offset = base;
-                     offset <
-                     std::min(nelems_, base + sgSize * (n_vecs * vec_sz));
-                     offset += sgSize)
-                {
-                    out[offset] = std::abs(in[offset]);
-                }
-            }
-            else {
-                auto sg = ndit.get_sub_group();
-                std::uint8_t sgSize = sg.get_local_range()[0];
-                std::uint8_t maxsgSize = sg.get_max_local_range()[0];
-                size_t base = n_vecs * vec_sz *
-                              (ndit.get_group(0) * ndit.get_local_range(0) +
-                               sg.get_group_id()[0] * maxsgSize);
-
-                if (base + n_vecs * vec_sz < nelems_) {
-                    using in_ptrT = sycl::multi_ptr<
-                        const argT, sycl::access::address_space::global_space>;
-                    using out_ptrT = sycl::multi_ptr<
-                        resT, sycl::access::address_space::global_space>;
-                    sycl::vec<argT, vec_sz> arg_vec;
-
-#pragma unroll
-                    for (std::uint8_t it = 0; it < n_vecs * vec_sz;
-                         it += vec_sz) {
-                        arg_vec =
-                            sg.load<vec_sz>(in_ptrT(&in[base + it * sgSize]));
-#pragma unroll
-                        for (std::uint8_t k = 0; k < vec_sz; ++k) {
-                            arg_vec[k] = std::abs(arg_vec[k]);
-                        }
-                        sg.store<vec_sz>(out_ptrT(&out[base + it * sgSize]),
-                                         arg_vec);
-                    }
-                }
-                else {
-                    for (size_t k = base + sg.get_local_id()[0]; k < nelems_;
-                         k += sgSize) {
-                        out[k] = std::abs(in[k]);
-                    }
-                }
-            }
+            return std::abs(x);
         }
     }
 };
 
+template <typename argT,
+          typename resT = argT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using AbsContigFunctor = elementwise_common::
+    UnaryContigFunctor<argT, resT, AbsFunctor<argT, resT>, vec_sz, n_vecs>;
+
 template <typename T> struct AbsOutputType
 {
     using value_type = typename std::disjunction< // disjunction is C++17
@@ -220,39 +144,9 @@ template <typename fnT, typename T> struct AbsTypeMapFactory
     }
 };
 
-template <typename argT, typename resT, typename IndexerT>
-struct AbsStridedFunctor
-{
-private:
-    const argT *in = nullptr;
-    resT *out = nullptr;
-    IndexerT inp_res_indexer_;
-
-public:
-    AbsStridedFunctor(const argT *inp_p,
-                      resT *res_p,
-                      IndexerT two_offsets_indexer)
-        : in(inp_p), out(res_p), inp_res_indexer_(two_offsets_indexer)
-    {
-    }
-
-    void operator()(sycl::id<1> wid) const
-    {
-        auto offsets_ = inp_res_indexer_(static_cast<py::ssize_t>(wid[0]));
-        const auto &inp_offset = offsets_.get_first_offset();
-        const auto &out_offset = offsets_.get_second_offset();
-
-        if constexpr (std::is_same_v<argT, bool> ||
-                      (std::is_integral<argT>::value &&
-                       std::is_unsigned<argT>::value))
-        {
-            out[out_offset] = in[inp_offset];
-        }
-        else {
-            out[out_offset] = std::abs(in[inp_offset]);
-        }
-    }
-};
+template <typename argTy, typename resTy, typename IndexerT>
+using AbsStridedFunctor = elementwise_common::
+    UnaryStridedFunctor<argTy, resTy, IndexerT, AbsFunctor<argTy, resTy>>;
 
 template <typename T1, typename T2, typename T3> class abs_strided_kernel;