Add jupyter notebook on 3D Poisson using CTF. Make some fixes to get it working, add python speye and vecnorm routines. Permit contractions like sparse = dense*sparse by sparsifying dense matrix. Some issues involving scaling need to be addressed.

solomonik · solomonik · commit d1c6d943941d · 2018-06-28T08:29:27.000+02:00
diff --git a/doc/Tensor Decompositions for 3D Poisson using Cyclops.ipynb b/doc/Tensor Decompositions for 3D Poisson using Cyclops.ipynb
@@ -0,0 +1,315 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import ctf"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Construct 1D Poisson stiffness matrix $A$."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-2.,  1.,  0.,  0.],\n",
+       "       [ 1., -2.,  1.,  0.],\n",
+       "       [ 0.,  1., -2.,  1.],\n",
+       "       [ 0.,  0.,  1., -2.]])"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "n = 4\n",
+    "A = (-2.)*ctf.eye(n,n,sp=True) + ctf.eye(n,n,1,sp=True) + ctf.eye(n,n,-1,sp=True)\n",
+    "A"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Construct 3D Poisson stiffness matrix $T = A \\otimes I \\otimes I + I \\otimes A \\otimes I + I \\otimes I \\otimes A$ as an order 6 tensor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "I = ctf.eye(n,n,sp=True) # sparse identity matrix\n",
+    "T = ctf.tensor((n,n,n,n,n,n),sp=True) # sparse tensor\n",
+    "T.i(\"aixbjy\") << A.i(\"ab\")*I.i(\"ij\")*I.i(\"xy\") + I.i(\"ab\")*A.i(\"ij\")*I.i(\"xy\") + I.i(\"ab\")*I.i(\"ij\")*A.i(\"xy\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The 3D Poisson stiffness matrix is full rank."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([10.85410197,  9.85410197,  9.85410197,  9.85410197,  8.85410197,\n",
+       "        8.85410197,  8.85410197,  8.61803399,  8.61803399,  8.61803399,\n",
+       "        7.85410197,  7.61803399,  7.61803399,  7.61803399,  7.61803399,\n",
+       "        7.61803399,  7.61803399,  7.61803399,  7.61803399,  7.61803399,\n",
+       "        6.61803399,  6.61803399,  6.61803399,  6.61803399,  6.61803399,\n",
+       "        6.61803399,  6.61803399,  6.61803399,  6.61803399,  6.38196601,\n",
+       "        6.38196601,  6.38196601,  5.61803399,  5.61803399,  5.61803399,\n",
+       "        5.38196601,  5.38196601,  5.38196601,  5.38196601,  5.38196601,\n",
+       "        5.38196601,  5.38196601,  5.38196601,  5.38196601,  4.38196601,\n",
+       "        4.38196601,  4.38196601,  4.38196601,  4.38196601,  4.38196601,\n",
+       "        4.38196601,  4.38196601,  4.38196601,  4.14589803,  3.38196601,\n",
+       "        3.38196601,  3.38196601,  3.14589803,  3.14589803,  3.14589803,\n",
+       "        2.14589803,  2.14589803,  2.14589803,  1.14589803])"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[U,S,V] = ctf.svd(T.reshape((n*n*n,n*n*n)))\n",
+    "S"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "However, if we transpose the tensor modes, the Kronecker product gives a rank-2 form."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "6.871137023129482e-14\n"
+     ]
+    }
+   ],
+   "source": [
+    "T2 = ctf.tensor((n,n,n,n,n,n),sp=True)\n",
+    "T2.i(\"abijxy\") << T.i(\"aixbjy\") # transpose tensor\n",
+    "[U,S,V] = ctf.svd(T2.reshape((n*n, n*n*n*n)),2) # compute rank-2 SVD on unfolded tensor\n",
+    "print(ctf.vecnorm(T2.reshape((n*n, n*n*n*n))-U@ctf.diag(S,sp=True)@V)) # compute norm of error"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In fact, there are two low-rank matrix unfoldings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "7.495317009386868e-14\n"
+     ]
+    }
+   ],
+   "source": [
+    "[U,S,V] = ctf.svd(T2.reshape((n*n*n*n, n*n)),2) # compute rank-2 SVD on unfolded tensor\n",
+    "print(ctf.vecnorm(T2.reshape((n*n*n*n, n*n))-U@ctf.diag(S,sp=True)@V)) # compute norm of error"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can construct a tensor train factorization to exploit both unfoldings. The tensor train ranks are $2\\times 2$."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "[U1,S1,V1] = ctf.svd(T2.reshape((n*n, n*n*n*n)),2) # compute rank-2 SVD on unfolded tensor\n",
+    "[U2,S2,V2] = ctf.svd((ctf.diag(S1,sp=True) @ V1).reshape((2*n*n, n*n)),2)\n",
+    "V2 = ctf.diag(S2,sp=True) @ V2\n",
+    "W1 = U1.reshape((n,n,2))\n",
+    "W2 = U2.reshape((2,n,n,2))\n",
+    "W3 = V2.reshape((2,n,n))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The tensor train factorization requires $O(n^2)$ storage for this tensor, which is $n\\times n\\times n\\times n\\times n\\times n$ and has $O(n^3)$ nonzeros."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "5.709367875808552e-14"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "E = ctf.tensor((n,n,n,n,n,n))\n",
+    "E.i(\"aixbjy\") << T.i(\"aixbjy\") - W1.i(\"abu\")*W2.i(\"uijv\")*W3.i(\"vxy\")\n",
+    "ctf.vecnorm(E)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The CP decomposition of this tensor should be rank 2 and provides further compression."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ctf import random\n",
+    "ctf.random.seed(42)\n",
+    "Z1 = ctf.random.random((n,n,2))\n",
+    "Z2 = ctf.random.random((n,n,2))\n",
+    "Z3 = ctf.random.random((n,n,2))\n",
+    "lmbda = ctf.random.random((2))\n",
+    "\n",
+    "niter = 0\n",
+    "\n",
+    "def normalize(Z):\n",
+    "    norms = ctf.tensor(2)\n",
+    "    norms.i(\"u\") << Z.i(\"pqu\")*Z.i(\"pqu\")\n",
+    "    norms = 1./norms**.5\n",
+    "    X = ctf.tensor(copy=Z)\n",
+    "    Z.set_zero()\n",
+    "    Z.i(\"pqu\") << X.i(\"pqu\")*norms.i(\"u\")\n",
+    "    return 1./norms\n",
+    "\n",
+    "normalize(Z1)\n",
+    "normalize(Z2)\n",
+    "normalize(Z3)\n",
+    "\n",
+    "E = ctf.tensor((n,n,n,n,n,n))\n",
+    "E.i(\"aixbjy\") << T.i(\"aixbjy\") - lmbda.i(\"u\")*Z1.i(\"abu\")*Z2.i(\"iju\")*Z3.i(\"xyu\")\n",
+    "\n",
+    "while (ctf.vecnorm(E) > 1.e-6 and niter < 100):\n",
+    "    if niter % 10 == 0:\n",
+    "        print(ctf.vecnorm(E))\n",
+    "    M = ctf.tensor((n,n,n,n,2))\n",
+    "    M.i(\"ijxyu\") << Z2.i(\"iju\")*Z3.i(\"xyu\")\n",
+    "    [U,S,V] = ctf.svd(M.reshape((n*n*n*n,2)),2)\n",
+    "    S = 1./S\n",
+    "    Z1.set_zero()\n",
+    "    Z1.i(\"abu\") << V.i(\"vu\")*S.i(\"v\")*U.reshape((n,n,n,n,2)).i(\"ijxyv\")*T.i(\"aixbjy\")\n",
+    "    \n",
+    "    normalize(Z1)\n",
+    "    \n",
+    "    M.set_zero()\n",
+    "    M.i(\"abxyu\") << Z1.i(\"abu\")*Z3.i(\"xyu\")\n",
+    "    [U,S,V] = ctf.svd(M.reshape((n*n*n*n,2)),2)\n",
+    "    S = 1./S\n",
+    "    Z2.set_zero()\n",
+    "    Z2.i(\"iju\") << V.i(\"vu\")*S.i(\"v\")*U.reshape((n,n,n,n,2)).i(\"abxyv\")*T.i(\"aixbjy\")\n",
+    "    \n",
+    "    normalize(Z2)\n",
+    "    \n",
+    "    M.set_zero()\n",
+    "    M.i(\"abiju\") << Z1.i(\"abu\")*Z2.i(\"iju\")\n",
+    "    [U,S,V] = ctf.svd(M.reshape((n*n*n*n,2)),2)\n",
+    "    S = 1./S\n",
+    "    Z3.set_zero()\n",
+    "    Z3.i(\"xyu\") << V.i(\"vu\")*S.i(\"v\")*U.reshape((n,n,n,n,2)).i(\"abijv\")*T.i(\"aixbjy\")\n",
+    "\n",
+    "    lmbda = normalize(Z3)\n",
+    "    \n",
+    "    E.set_zero()\n",
+    "    E.i(\"aixbjy\") << T.i(\"aixbjy\") - lmbda.i(\"u\")*Z1.i(\"abu\")*Z2.i(\"iju\")*Z3.i(\"xyu\")\n",
+    "    niter+=1\n",
+    "\n",
+    "E.i(\"aixbjy\") << T.i(\"aixbjy\") - lmbda.i(\"u\")*Z1.i(\"abu\")*Z2.i(\"iju\")*Z3.i(\"xyu\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/contraction/contraction.cxx b/src/contraction/contraction.cxx
@@ -4814,6 +4814,7 @@ namespace CTF_int {
       return SUCCESS;
     }
 
+
     CTF_int::contract_mst();
 
     //if (stype->tid_A == stype->tid_B || stype->tid_A == stype->tid_C){
@@ -4836,6 +4837,16 @@ namespace CTF_int {
     //CTF_ctr_type_t ntype = *stype;
     contraction new_ctr = contraction(*this);
 
+    if (C->is_sparse && !A->is_sparse){
+      new_ctr.A = new tensor(A, 1, 1);
+      new_ctr.A->sparsify(); 
+    }
+
+    if (C->is_sparse && !B->is_sparse){
+      new_ctr.B = new tensor(B, 1, 1);
+      new_ctr.B->sparsify(); 
+    }
+
     was_home_A = A->is_home;
     was_home_B = B->is_home;
     was_home_C = C->is_home;
diff --git a/src/interface/matrix.cxx b/src/interface/matrix.cxx
@@ -191,6 +191,7 @@ namespace CTF {
       int rblk = (rank+pr-rsrc)%pr;
       for (int64_t j=0; j<nmyr;  j++){
         pairs[i*nmyr+j].k = (cblk*nb+(i%nb))*nrow+rblk*mb+(j%mb);
+    //    pairs[i*nmyr+j].d = *(dtype*)this->sr->addid();
         //printf("RANK = %d, pairs[%ld].k=%ld\m",rank,i*nmyr+j,pairs[i*nmyr+j].k);
         if ((j+1)%mb == 0) rblk += pr;
       }
@@ -251,7 +252,8 @@ namespace CTF {
                                int     csrc,
                                int     lda,
                                dtype * data_){
-    if (mb==1 && nb==1 && nrow%pr==0 && ncol%pc==0 && rsrc==0 && csrc==0){
+    //FIXME: (1) can optimize sparse for this case (mapping cyclic), (2) can use permute to avoid sparse redistribution always
+    if (!this->is_sparse && (mb==1 && nb==1 && nrow%pr==0 && ncol%pc==0 && rsrc==0 && csrc==0)){
       if (this->edge_map[0].np == pr && this->edge_map[1].np == pc){
         if (lda == nrow/pc){
           memcpy((char*)data_, this->data, sizeof(dtype)*this->size);
@@ -276,11 +278,13 @@ namespace CTF {
       if (lda == nmyr){
         for (int64_t i=0; i<nmyr*nmyc; i++){
           data_[i] = pairs[i].d;
+          //printf("data %ld = %lf\n",i,data_[i]);
         }
       } else {
         for (int64_t i=0; i<nmyc; i++){
           for (int64_t j=0; j<nmyr; j++){
             data_[i*lda+j] = pairs[i*nmyr+j].d;
+            //printf("data %ld %ld = %lf\n",i,j,data_[i*lda+j]);
           }
         }
       }
diff --git a/src/interface/term.cxx b/src/interface/term.cxx
@@ -94,8 +94,8 @@ namespace CTF_int {
       sym_C[idx] = NS;
       idx++;
     }
-
-    tensor * tsr_C = new tensor(A.parent->sr, order_C, len_C, sym_C, A.parent->wrld, 1);
+    bool is_sparse_C = A.parent->is_sparse && B.parent->is_sparse;
+    tensor * tsr_C = new tensor(A.parent->sr, order_C, len_C, sym_C, A.parent->wrld, true, NULL, 1, is_sparse_C);
     Idx_Tensor * out = new Idx_Tensor(tsr_C, idx_C);
     //printf("A_inds =");
     //for (int i=0; i<A.parent->order; i++){
diff --git a/src/interface/term.h b/src/interface/term.h
@@ -159,9 +159,6 @@ namespace CTF_int {
        *        allows a scalar output
        */     
       operator int() const;
-
-
-
   };
 
   class Sum_Term : public Term {
diff --git a/src/redistribution/sparse_rw.cxx b/src/redistribution/sparse_rw.cxx
diff --git a/src_python/ctf/core.pyx b/src_python/ctf/core.pyx