Skip to content

Commit d1c6d94

Browse files
committed
Add jupyter notebook on 3D Poisson using CTF. Make some fixes to get it working, add python speye and vecnorm routines. Permit contractions like sparse = dense*sparse by sparsifying dense matrix. Some issues involving scaling need to be addressed.
1 parent 987f852 commit d1c6d94

File tree

7 files changed

+396
-21
lines changed

7 files changed

+396
-21
lines changed
Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import ctf"
10+
]
11+
},
12+
{
13+
"cell_type": "markdown",
14+
"metadata": {},
15+
"source": [
16+
"Construct 1D Poisson stiffness matrix $A$."
17+
]
18+
},
19+
{
20+
"cell_type": "code",
21+
"execution_count": 2,
22+
"metadata": {},
23+
"outputs": [
24+
{
25+
"data": {
26+
"text/plain": [
27+
"array([[-2., 1., 0., 0.],\n",
28+
" [ 1., -2., 1., 0.],\n",
29+
" [ 0., 1., -2., 1.],\n",
30+
" [ 0., 0., 1., -2.]])"
31+
]
32+
},
33+
"execution_count": 2,
34+
"metadata": {},
35+
"output_type": "execute_result"
36+
}
37+
],
38+
"source": [
39+
"n = 4\n",
40+
"A = (-2.)*ctf.eye(n,n,sp=True) + ctf.eye(n,n,1,sp=True) + ctf.eye(n,n,-1,sp=True)\n",
41+
"A"
42+
]
43+
},
44+
{
45+
"cell_type": "markdown",
46+
"metadata": {},
47+
"source": [
48+
"Construct 3D Poisson stiffness matrix $T = A \\otimes I \\otimes I + I \\otimes A \\otimes I + I \\otimes I \\otimes A$ as an order 6 tensor."
49+
]
50+
},
51+
{
52+
"cell_type": "code",
53+
"execution_count": 3,
54+
"metadata": {},
55+
"outputs": [],
56+
"source": [
57+
"I = ctf.eye(n,n,sp=True) # sparse identity matrix\n",
58+
"T = ctf.tensor((n,n,n,n,n,n),sp=True) # sparse tensor\n",
59+
"T.i(\"aixbjy\") << A.i(\"ab\")*I.i(\"ij\")*I.i(\"xy\") + I.i(\"ab\")*A.i(\"ij\")*I.i(\"xy\") + I.i(\"ab\")*I.i(\"ij\")*A.i(\"xy\")"
60+
]
61+
},
62+
{
63+
"cell_type": "markdown",
64+
"metadata": {},
65+
"source": [
66+
"The 3D Poisson stiffness matrix is full rank."
67+
]
68+
},
69+
{
70+
"cell_type": "code",
71+
"execution_count": 4,
72+
"metadata": {},
73+
"outputs": [
74+
{
75+
"data": {
76+
"text/plain": [
77+
"array([10.85410197, 9.85410197, 9.85410197, 9.85410197, 8.85410197,\n",
78+
" 8.85410197, 8.85410197, 8.61803399, 8.61803399, 8.61803399,\n",
79+
" 7.85410197, 7.61803399, 7.61803399, 7.61803399, 7.61803399,\n",
80+
" 7.61803399, 7.61803399, 7.61803399, 7.61803399, 7.61803399,\n",
81+
" 6.61803399, 6.61803399, 6.61803399, 6.61803399, 6.61803399,\n",
82+
" 6.61803399, 6.61803399, 6.61803399, 6.61803399, 6.38196601,\n",
83+
" 6.38196601, 6.38196601, 5.61803399, 5.61803399, 5.61803399,\n",
84+
" 5.38196601, 5.38196601, 5.38196601, 5.38196601, 5.38196601,\n",
85+
" 5.38196601, 5.38196601, 5.38196601, 5.38196601, 4.38196601,\n",
86+
" 4.38196601, 4.38196601, 4.38196601, 4.38196601, 4.38196601,\n",
87+
" 4.38196601, 4.38196601, 4.38196601, 4.14589803, 3.38196601,\n",
88+
" 3.38196601, 3.38196601, 3.14589803, 3.14589803, 3.14589803,\n",
89+
" 2.14589803, 2.14589803, 2.14589803, 1.14589803])"
90+
]
91+
},
92+
"execution_count": 4,
93+
"metadata": {},
94+
"output_type": "execute_result"
95+
}
96+
],
97+
"source": [
98+
"[U,S,V] = ctf.svd(T.reshape((n*n*n,n*n*n)))\n",
99+
"S"
100+
]
101+
},
102+
{
103+
"cell_type": "markdown",
104+
"metadata": {},
105+
"source": [
106+
"However, if we transpose the tensor modes, the Kronecker product gives a rank-2 form."
107+
]
108+
},
109+
{
110+
"cell_type": "code",
111+
"execution_count": 5,
112+
"metadata": {},
113+
"outputs": [
114+
{
115+
"name": "stdout",
116+
"output_type": "stream",
117+
"text": [
118+
"6.871137023129482e-14\n"
119+
]
120+
}
121+
],
122+
"source": [
123+
"T2 = ctf.tensor((n,n,n,n,n,n),sp=True)\n",
124+
"T2.i(\"abijxy\") << T.i(\"aixbjy\") # transpose tensor\n",
125+
"[U,S,V] = ctf.svd(T2.reshape((n*n, n*n*n*n)),2) # compute rank-2 SVD on unfolded tensor\n",
126+
"print(ctf.vecnorm(T2.reshape((n*n, n*n*n*n))-U@ctf.diag(S,sp=True)@V)) # compute norm of error"
127+
]
128+
},
129+
{
130+
"cell_type": "markdown",
131+
"metadata": {},
132+
"source": [
133+
"In fact, there are two low-rank matrix unfoldings."
134+
]
135+
},
136+
{
137+
"cell_type": "code",
138+
"execution_count": 6,
139+
"metadata": {},
140+
"outputs": [
141+
{
142+
"name": "stdout",
143+
"output_type": "stream",
144+
"text": [
145+
"7.495317009386868e-14\n"
146+
]
147+
}
148+
],
149+
"source": [
150+
"[U,S,V] = ctf.svd(T2.reshape((n*n*n*n, n*n)),2) # compute rank-2 SVD on unfolded tensor\n",
151+
"print(ctf.vecnorm(T2.reshape((n*n*n*n, n*n))-U@ctf.diag(S,sp=True)@V)) # compute norm of error"
152+
]
153+
},
154+
{
155+
"cell_type": "markdown",
156+
"metadata": {},
157+
"source": [
158+
"We can construct a tensor train factorization to exploit both unfoldings. The tensor train ranks are $2\\times 2$."
159+
]
160+
},
161+
{
162+
"cell_type": "code",
163+
"execution_count": 7,
164+
"metadata": {},
165+
"outputs": [],
166+
"source": [
167+
"[U1,S1,V1] = ctf.svd(T2.reshape((n*n, n*n*n*n)),2) # compute rank-2 SVD on unfolded tensor\n",
168+
"[U2,S2,V2] = ctf.svd((ctf.diag(S1,sp=True) @ V1).reshape((2*n*n, n*n)),2)\n",
169+
"V2 = ctf.diag(S2,sp=True) @ V2\n",
170+
"W1 = U1.reshape((n,n,2))\n",
171+
"W2 = U2.reshape((2,n,n,2))\n",
172+
"W3 = V2.reshape((2,n,n))"
173+
]
174+
},
175+
{
176+
"cell_type": "markdown",
177+
"metadata": {},
178+
"source": [
179+
"The tensor train factorization requires $O(n^2)$ storage for this tensor, which is $n\\times n\\times n\\times n\\times n\\times n$ and has $O(n^3)$ nonzeros."
180+
]
181+
},
182+
{
183+
"cell_type": "code",
184+
"execution_count": 8,
185+
"metadata": {},
186+
"outputs": [
187+
{
188+
"data": {
189+
"text/plain": [
190+
"5.709367875808552e-14"
191+
]
192+
},
193+
"execution_count": 8,
194+
"metadata": {},
195+
"output_type": "execute_result"
196+
}
197+
],
198+
"source": [
199+
"E = ctf.tensor((n,n,n,n,n,n))\n",
200+
"E.i(\"aixbjy\") << T.i(\"aixbjy\") - W1.i(\"abu\")*W2.i(\"uijv\")*W3.i(\"vxy\")\n",
201+
"ctf.vecnorm(E)"
202+
]
203+
},
204+
{
205+
"cell_type": "markdown",
206+
"metadata": {},
207+
"source": [
208+
"The CP decomposition of this tensor should be rank 2 and provides further compression."
209+
]
210+
},
211+
{
212+
"cell_type": "code",
213+
"execution_count": null,
214+
"metadata": {},
215+
"outputs": [],
216+
"source": [
217+
"from ctf import random\n",
218+
"ctf.random.seed(42)\n",
219+
"Z1 = ctf.random.random((n,n,2))\n",
220+
"Z2 = ctf.random.random((n,n,2))\n",
221+
"Z3 = ctf.random.random((n,n,2))\n",
222+
"lmbda = ctf.random.random((2))\n",
223+
"\n",
224+
"niter = 0\n",
225+
"\n",
226+
"def normalize(Z):\n",
227+
" norms = ctf.tensor(2)\n",
228+
" norms.i(\"u\") << Z.i(\"pqu\")*Z.i(\"pqu\")\n",
229+
" norms = 1./norms**.5\n",
230+
" X = ctf.tensor(copy=Z)\n",
231+
" Z.set_zero()\n",
232+
" Z.i(\"pqu\") << X.i(\"pqu\")*norms.i(\"u\")\n",
233+
" return 1./norms\n",
234+
"\n",
235+
"normalize(Z1)\n",
236+
"normalize(Z2)\n",
237+
"normalize(Z3)\n",
238+
"\n",
239+
"E = ctf.tensor((n,n,n,n,n,n))\n",
240+
"E.i(\"aixbjy\") << T.i(\"aixbjy\") - lmbda.i(\"u\")*Z1.i(\"abu\")*Z2.i(\"iju\")*Z3.i(\"xyu\")\n",
241+
"\n",
242+
"while (ctf.vecnorm(E) > 1.e-6 and niter < 100):\n",
243+
" if niter % 10 == 0:\n",
244+
" print(ctf.vecnorm(E))\n",
245+
" M = ctf.tensor((n,n,n,n,2))\n",
246+
" M.i(\"ijxyu\") << Z2.i(\"iju\")*Z3.i(\"xyu\")\n",
247+
" [U,S,V] = ctf.svd(M.reshape((n*n*n*n,2)),2)\n",
248+
" S = 1./S\n",
249+
" Z1.set_zero()\n",
250+
" Z1.i(\"abu\") << V.i(\"vu\")*S.i(\"v\")*U.reshape((n,n,n,n,2)).i(\"ijxyv\")*T.i(\"aixbjy\")\n",
251+
" \n",
252+
" normalize(Z1)\n",
253+
" \n",
254+
" M.set_zero()\n",
255+
" M.i(\"abxyu\") << Z1.i(\"abu\")*Z3.i(\"xyu\")\n",
256+
" [U,S,V] = ctf.svd(M.reshape((n*n*n*n,2)),2)\n",
257+
" S = 1./S\n",
258+
" Z2.set_zero()\n",
259+
" Z2.i(\"iju\") << V.i(\"vu\")*S.i(\"v\")*U.reshape((n,n,n,n,2)).i(\"abxyv\")*T.i(\"aixbjy\")\n",
260+
" \n",
261+
" normalize(Z2)\n",
262+
" \n",
263+
" M.set_zero()\n",
264+
" M.i(\"abiju\") << Z1.i(\"abu\")*Z2.i(\"iju\")\n",
265+
" [U,S,V] = ctf.svd(M.reshape((n*n*n*n,2)),2)\n",
266+
" S = 1./S\n",
267+
" Z3.set_zero()\n",
268+
" Z3.i(\"xyu\") << V.i(\"vu\")*S.i(\"v\")*U.reshape((n,n,n,n,2)).i(\"abijv\")*T.i(\"aixbjy\")\n",
269+
"\n",
270+
" lmbda = normalize(Z3)\n",
271+
" \n",
272+
" E.set_zero()\n",
273+
" E.i(\"aixbjy\") << T.i(\"aixbjy\") - lmbda.i(\"u\")*Z1.i(\"abu\")*Z2.i(\"iju\")*Z3.i(\"xyu\")\n",
274+
" niter+=1\n",
275+
"\n",
276+
"E.i(\"aixbjy\") << T.i(\"aixbjy\") - lmbda.i(\"u\")*Z1.i(\"abu\")*Z2.i(\"iju\")*Z3.i(\"xyu\")"
277+
]
278+
},
279+
{
280+
"cell_type": "code",
281+
"execution_count": null,
282+
"metadata": {},
283+
"outputs": [],
284+
"source": []
285+
},
286+
{
287+
"cell_type": "code",
288+
"execution_count": null,
289+
"metadata": {},
290+
"outputs": [],
291+
"source": []
292+
}
293+
],
294+
"metadata": {
295+
"kernelspec": {
296+
"display_name": "Python 3",
297+
"language": "python",
298+
"name": "python3"
299+
},
300+
"language_info": {
301+
"codemirror_mode": {
302+
"name": "ipython",
303+
"version": 3
304+
},
305+
"file_extension": ".py",
306+
"mimetype": "text/x-python",
307+
"name": "python",
308+
"nbconvert_exporter": "python",
309+
"pygments_lexer": "ipython3",
310+
"version": "3.5.2"
311+
}
312+
},
313+
"nbformat": 4,
314+
"nbformat_minor": 2
315+
}

src/contraction/contraction.cxx

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4814,6 +4814,7 @@ namespace CTF_int {
48144814
return SUCCESS;
48154815
}
48164816

4817+
48174818
CTF_int::contract_mst();
48184819

48194820
//if (stype->tid_A == stype->tid_B || stype->tid_A == stype->tid_C){
@@ -4836,6 +4837,16 @@ namespace CTF_int {
48364837
//CTF_ctr_type_t ntype = *stype;
48374838
contraction new_ctr = contraction(*this);
48384839

4840+
if (C->is_sparse && !A->is_sparse){
4841+
new_ctr.A = new tensor(A, 1, 1);
4842+
new_ctr.A->sparsify();
4843+
}
4844+
4845+
if (C->is_sparse && !B->is_sparse){
4846+
new_ctr.B = new tensor(B, 1, 1);
4847+
new_ctr.B->sparsify();
4848+
}
4849+
48394850
was_home_A = A->is_home;
48404851
was_home_B = B->is_home;
48414852
was_home_C = C->is_home;

src/interface/matrix.cxx

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ namespace CTF {
191191
int rblk = (rank+pr-rsrc)%pr;
192192
for (int64_t j=0; j<nmyr; j++){
193193
pairs[i*nmyr+j].k = (cblk*nb+(i%nb))*nrow+rblk*mb+(j%mb);
194+
// pairs[i*nmyr+j].d = *(dtype*)this->sr->addid();
194195
//printf("RANK = %d, pairs[%ld].k=%ld\m",rank,i*nmyr+j,pairs[i*nmyr+j].k);
195196
if ((j+1)%mb == 0) rblk += pr;
196197
}
@@ -251,7 +252,8 @@ namespace CTF {
251252
int csrc,
252253
int lda,
253254
dtype * data_){
254-
if (mb==1 && nb==1 && nrow%pr==0 && ncol%pc==0 && rsrc==0 && csrc==0){
255+
//FIXME: (1) can optimize sparse for this case (mapping cyclic), (2) can use permute to avoid sparse redistribution always
256+
if (!this->is_sparse && (mb==1 && nb==1 && nrow%pr==0 && ncol%pc==0 && rsrc==0 && csrc==0)){
255257
if (this->edge_map[0].np == pr && this->edge_map[1].np == pc){
256258
if (lda == nrow/pc){
257259
memcpy((char*)data_, this->data, sizeof(dtype)*this->size);
@@ -276,11 +278,13 @@ namespace CTF {
276278
if (lda == nmyr){
277279
for (int64_t i=0; i<nmyr*nmyc; i++){
278280
data_[i] = pairs[i].d;
281+
//printf("data %ld = %lf\n",i,data_[i]);
279282
}
280283
} else {
281284
for (int64_t i=0; i<nmyc; i++){
282285
for (int64_t j=0; j<nmyr; j++){
283286
data_[i*lda+j] = pairs[i*nmyr+j].d;
287+
//printf("data %ld %ld = %lf\n",i,j,data_[i*lda+j]);
284288
}
285289
}
286290
}

src/interface/term.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ namespace CTF_int {
9494
sym_C[idx] = NS;
9595
idx++;
9696
}
97-
98-
tensor * tsr_C = new tensor(A.parent->sr, order_C, len_C, sym_C, A.parent->wrld, 1);
97+
bool is_sparse_C = A.parent->is_sparse && B.parent->is_sparse;
98+
tensor * tsr_C = new tensor(A.parent->sr, order_C, len_C, sym_C, A.parent->wrld, true, NULL, 1, is_sparse_C);
9999
Idx_Tensor * out = new Idx_Tensor(tsr_C, idx_C);
100100
//printf("A_inds =");
101101
//for (int i=0; i<A.parent->order; i++){

src/interface/term.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -159,9 +159,6 @@ namespace CTF_int {
159159
* allows a scalar output
160160
*/
161161
operator int() const;
162-
163-
164-
165162
};
166163

167164
class Sum_Term : public Term {

0 commit comments

Comments
 (0)