-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathsop.pyx
133 lines (117 loc) · 3.86 KB
/
sop.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import numpy as np
cimport numpy as np
cimport cython
def sum_of_prod(ops, op_axes, order='K', itdump=False, **kwargs):
cdef int nop
nop = len(ops)
if nop==2:
return sum_product_cy(ops, op_axes, order=order)
if nop==3:
return sum_product_cy3(ops, op_axes, order=order)
ops.append(None)
flags = ['reduce_ok','buffered', 'external_loop',
'delay_bufalloc', 'grow_inner',
'zerosize_ok', 'refs_ok']
op_flags = [['readonly']]*nop + [['allocate','readwrite']]
it = np.nditer(ops, flags, op_flags, op_axes=op_axes,
order=order)
it.operands[nop][...] = 0
it.reset()
cnt = 0
if itdump:
it.debug_print()
if nop==1:
# a sum without multiply
for (x,w) in it:
w[...] += x
cnt += 1
elif nop==2:
for (x,y,w) in it:
w[...] += x*y
cnt += 1
elif nop==3:
for (x,y,z,w) in it:
w[...] += x*y*z
cnt += 1
else:
raise ValueError('calc for more than 3 nop not implemented')
return it.operands[nop]
@cython.boundscheck(False)
def sum_product_cy(ops, op_axes, order='K'):
#(arr, axis=None, out=None):
cdef np.ndarray[double] x
cdef np.ndarray[double] y
cdef np.ndarray[double] w
cdef int size
cdef int nop
nop = len(ops)
ops.append(None)
"""
axeslist = axis_to_axeslist(axis, arr.ndim)
it = np.nditer([arr, out], flags=['reduce_ok', 'external_loop',
'buffered', 'delay_bufalloc'],
op_flags=[['readonly'], ['readwrite', 'allocate']],
op_axes=[None, axeslist],
op_dtypes=['float64', 'float64'])
"""
flags = ['reduce_ok','buffered', 'external_loop',
'delay_bufalloc', 'grow_inner',
'zerosize_ok', 'refs_ok']
op_flags = [['readonly']]*nop + [['allocate','readwrite']]
it = np.nditer(ops, flags, op_flags, op_axes=op_axes, order=order)
it.operands[nop][...] = 0
it.reset()
for xarr, yarr, warr in it:
x = xarr
y = yarr
w = warr
size = x.shape[0]
for i in range(size):
w[i] = w[i] + x[i] * y[i]
return it.operands[nop]
@cython.boundscheck(False)
def sum_product_cy3(ops, op_axes, order='K'):
#(arr, axis=None, out=None):
cdef np.ndarray[double] x
cdef np.ndarray[double] y
cdef np.ndarray[double] z
cdef np.ndarray[double] w
cdef int size
cdef int nop
nop = len(ops)
ops.append(None)
"""
axeslist = axis_to_axeslist(axis, arr.ndim)
it = np.nditer([arr, out], flags=['reduce_ok', 'external_loop',
'buffered', 'delay_bufalloc'],
op_flags=[['readonly'], ['readwrite', 'allocate']],
op_axes=[None, axeslist],
op_dtypes=['float64', 'float64'])
"""
flags = ['reduce_ok','buffered', 'external_loop',
'delay_bufalloc', 'grow_inner',
'zerosize_ok', 'refs_ok']
op_flags = [['readonly']]*nop + [['allocate','readwrite']]
it = np.nditer(ops, flags, op_flags, op_axes=op_axes, order=order)
it.operands[nop][...] = 0
it.reset()
for xarr, yarr, zarr, warr in it:
x = xarr
y = yarr
z = zarr
w = warr
size = x.shape[0]
for i in range(size):
w[i] = w[i] + x[i] * y[i] * z[i]
return it.operands[nop]
"""
# ops need to be double to match (for now) dtype in code
x=np.arange(1000,dtype=np.double).reshape(10,10,10)
y=np.arange(1000,dtype=np.double).reshape(10,10,10)
einsum_py.myeinsum('ijk,ijk->ijk',x,y,debug=True).shape
# use op_axes from this
op_axes=[[1, 2, 0], [1, 2, 0], [1, 2, 0]]
ops=[x,y];w=sop.sum_product_cy(ops,op_axes)
# cython sop.pyx
# gcc -shared -pthread -fPIC -fwrapv -O2 -Wall -fno-strict-aliasing -I/usr/local/include/python2.7 -o sop.so sop.c
"""