40
40
]
41
41
42
42
43
+ def _typestr_has_fp64 (arr_typestr ):
44
+ return arr_typestr in ["f8" , "c16" ]
45
+
46
+
47
+ def _typestr_has_fp16 (arr_typestr ):
48
+ return arr_typestr in ["f2" ]
49
+
50
+
43
51
@pytest .fixture (params = _usm_types_list )
44
52
def usm_type (request ):
45
53
return request .param
@@ -95,6 +103,14 @@ def test_copy1d_c_contig(src_typestr, dst_typestr):
95
103
q = dpctl .SyclQueue ()
96
104
except dpctl .SyclQueueCreationError :
97
105
pytest .skip ("Queue could not be created" )
106
+ if not q .sycl_device .has_aspect_fp64 and (
107
+ _typestr_has_fp64 (src_typestr ) or _typestr_has_fp64 (dst_typestr )
108
+ ):
109
+ pytest .skip ("Device does not support double precision" )
110
+ if not q .sycl_device .has_aspect_fp16 and (
111
+ _typestr_has_fp16 (src_typestr ) or _typestr_has_fp16 (dst_typestr )
112
+ ):
113
+ pytest .skip ("Device does not support half precision" )
98
114
src_dt = np .dtype (src_typestr )
99
115
dst_dt = np .dtype (dst_typestr )
100
116
Xnp = _random_vector (4096 , src_dt )
@@ -113,6 +129,14 @@ def test_copy1d_strided(src_typestr, dst_typestr):
113
129
q = dpctl .SyclQueue ()
114
130
except dpctl .SyclQueueCreationError :
115
131
pytest .skip ("Queue could not be created" )
132
+ if not q .sycl_device .has_aspect_fp64 and (
133
+ _typestr_has_fp64 (src_typestr ) or _typestr_has_fp64 (dst_typestr )
134
+ ):
135
+ pytest .skip ("Device does not support double precision" )
136
+ if not q .sycl_device .has_aspect_fp16 and (
137
+ _typestr_has_fp16 (src_typestr ) or _typestr_has_fp16 (dst_typestr )
138
+ ):
139
+ pytest .skip ("Device does not support half precision" )
116
140
src_dt = np .dtype (src_typestr )
117
141
dst_dt = np .dtype (dst_typestr )
118
142
Xnp = _random_vector (4096 , src_dt )
@@ -131,7 +155,12 @@ def test_copy1d_strided(src_typestr, dst_typestr):
131
155
assert are_close (Ynp , dpt .asnumpy (Y ))
132
156
133
157
# now 0-strided source
134
- X = dpt .usm_ndarray ((4096 ,), dtype = src_typestr , strides = (0 ,))
158
+ X = dpt .usm_ndarray (
159
+ (4096 ,),
160
+ dtype = src_typestr ,
161
+ strides = (0 ,),
162
+ buffer_ctor_kwargs = {"queue" : q },
163
+ )
135
164
X [0 ] = Xnp [0 ]
136
165
Y = dpt .empty (X .shape , dtype = dst_typestr , sycl_queue = q )
137
166
hev , ev = ti ._copy_usm_ndarray_into_usm_ndarray (src = X , dst = Y , sycl_queue = q )
@@ -145,6 +174,14 @@ def test_copy1d_strided2(src_typestr, dst_typestr):
145
174
q = dpctl .SyclQueue ()
146
175
except dpctl .SyclQueueCreationError :
147
176
pytest .skip ("Queue could not be created" )
177
+ if not q .sycl_device .has_aspect_fp64 and (
178
+ _typestr_has_fp64 (src_typestr ) or _typestr_has_fp64 (dst_typestr )
179
+ ):
180
+ pytest .skip ("Device does not support double precision" )
181
+ if not q .sycl_device .has_aspect_fp16 and (
182
+ _typestr_has_fp16 (src_typestr ) or _typestr_has_fp16 (dst_typestr )
183
+ ):
184
+ pytest .skip ("Device does not support half precision" )
148
185
src_dt = np .dtype (src_typestr )
149
186
dst_dt = np .dtype (dst_typestr )
150
187
Xnp = _random_vector (4096 , src_dt )
@@ -172,6 +209,14 @@ def test_copy2d(src_typestr, dst_typestr, st1, sgn1, st2, sgn2):
172
209
q = dpctl .SyclQueue ()
173
210
except dpctl .SyclQueueCreationError :
174
211
pytest .skip ("Queue could not be created" )
212
+ if not q .sycl_device .has_aspect_fp64 and (
213
+ _typestr_has_fp64 (src_typestr ) or _typestr_has_fp64 (dst_typestr )
214
+ ):
215
+ pytest .skip ("Device does not support double precision" )
216
+ if not q .sycl_device .has_aspect_fp16 and (
217
+ _typestr_has_fp16 (src_typestr ) or _typestr_has_fp16 (dst_typestr )
218
+ ):
219
+ pytest .skip ("Device does not support half precision" )
175
220
176
221
src_dt = np .dtype (src_typestr )
177
222
dst_dt = np .dtype (dst_typestr )
@@ -188,16 +233,16 @@ def test_copy2d(src_typestr, dst_typestr, st1, sgn1, st2, sgn2):
188
233
slice (None , None , st1 * sgn1 ),
189
234
slice (None , None , st2 * sgn2 ),
190
235
]
191
- Y = dpt .empty ((n1 , n2 ), dtype = dst_dt )
236
+ Y = dpt .empty ((n1 , n2 ), dtype = dst_dt , device = X . device )
192
237
hev , ev = ti ._copy_usm_ndarray_into_usm_ndarray (src = X , dst = Y , sycl_queue = q )
193
238
Ynp = _force_cast (Xnp , dst_dt )
194
239
hev .wait ()
195
240
assert are_close (Ynp , dpt .asnumpy (Y ))
196
- Yst = dpt .empty ((2 * n1 , n2 ), dtype = dst_dt )[::2 , ::- 1 ]
241
+ Yst = dpt .empty ((2 * n1 , n2 ), dtype = dst_dt , device = X . device )[::2 , ::- 1 ]
197
242
hev , ev = ti ._copy_usm_ndarray_into_usm_ndarray (
198
243
src = X , dst = Yst , sycl_queue = q
199
244
)
200
- Y = dpt .empty ((n1 , n2 ), dtype = dst_dt )
245
+ Y = dpt .empty ((n1 , n2 ), dtype = dst_dt , device = X . device )
201
246
hev2 , ev2 = ti ._copy_usm_ndarray_into_usm_ndarray (
202
247
src = Yst , dst = Y , sycl_queue = q , depends = [ev ]
203
248
)
@@ -220,6 +265,14 @@ def test_copy3d(src_typestr, dst_typestr, st1, sgn1, st2, sgn2, st3, sgn3):
220
265
except dpctl .SyclQueueCreationError :
221
266
pytest .skip ("Queue could not be created" )
222
267
268
+ if not q .sycl_device .has_aspect_fp64 and (
269
+ _typestr_has_fp64 (src_typestr ) or _typestr_has_fp64 (dst_typestr )
270
+ ):
271
+ pytest .skip ("Device does not support double precision" )
272
+ if not q .sycl_device .has_aspect_fp16 and (
273
+ _typestr_has_fp16 (src_typestr ) or _typestr_has_fp16 (dst_typestr )
274
+ ):
275
+ pytest .skip ("Device does not support half precision" )
223
276
src_dt = np .dtype (src_typestr )
224
277
dst_dt = np .dtype (dst_typestr )
225
278
n1 , n2 , n3 = 5 , 4 , 6
@@ -237,16 +290,16 @@ def test_copy3d(src_typestr, dst_typestr, st1, sgn1, st2, sgn2, st3, sgn3):
237
290
slice (None , None , st2 * sgn2 ),
238
291
slice (None , None , st3 * sgn3 ),
239
292
]
240
- Y = dpt .empty ((n1 , n2 , n3 ), dtype = dst_dt )
293
+ Y = dpt .empty ((n1 , n2 , n3 ), dtype = dst_dt , device = X . device )
241
294
hev , ev = ti ._copy_usm_ndarray_into_usm_ndarray (src = X , dst = Y , sycl_queue = q )
242
295
Ynp = _force_cast (Xnp , dst_dt )
243
296
hev .wait ()
244
297
assert are_close (Ynp , dpt .asnumpy (Y )), "1"
245
- Yst = dpt .empty ((2 * n1 , n2 , n3 ), dtype = dst_dt )[::2 , ::- 1 ]
298
+ Yst = dpt .empty ((2 * n1 , n2 , n3 ), dtype = dst_dt , device = X . device )[::2 , ::- 1 ]
246
299
hev2 , ev2 = ti ._copy_usm_ndarray_into_usm_ndarray (
247
300
src = X , dst = Yst , sycl_queue = q
248
301
)
249
- Y2 = dpt .empty ((n1 , n2 , n3 ), dtype = dst_dt )
302
+ Y2 = dpt .empty ((n1 , n2 , n3 ), dtype = dst_dt , device = X . device )
250
303
hev3 , ev3 = ti ._copy_usm_ndarray_into_usm_ndarray (
251
304
src = Yst , dst = Y2 , sycl_queue = q , depends = [ev2 ]
252
305
)
0 commit comments