Closed
Description
import dpctl.tensor as dpt
c = dpt.ones(8192, device='cpu', dtype='f4')
%timeit dpt.divide(c, c[:, None])
#435 ms ± 777 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
c_gpu = dpt.ones(8192, device='gpu', dtype='f4')
%timeit dpt.divide(c_gpu, c_gpu[:, None])
#1.17 s ± 4.41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
import numpy
c_np = numpy.ones(8192, dtype='f4')
%timeit numpy.divide(c_np, c_np[:, None])
#67.2 ms ± 3.02 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
Metadata
Metadata
Assignees
Labels
No labels