diff --git a/aten/src/ATen/native/sparse/SparseTensor.cpp b/aten/src/ATen/native/sparse/SparseTensor.cpp
index 091ec2ce431e6..c96e577ba9d47 100644
--- a/aten/src/ATen/native/sparse/SparseTensor.cpp
+++ b/aten/src/ATen/native/sparse/SparseTensor.cpp
@@ -125,7 +125,7 @@ SparseTensor new_with_dims_and_size_sparse(const SparseType& dtype, int64_t spar
   SparseTensor self = new_sparse(dtype);
   AT_CHECK(size.size() != 0,
     "cannot construct sparse tensor with 0 dimensions and no values; you must specify at least 1 dimension if you want to create a sparse tensor with no elements, \
-or you must provide a single-element `values` tensor (e.g. x=torch.sparse_coo_tensor(torch.zeros(0,1), 12.3, [])) if you want to create a scalar sparse tensor");
+or you must provide a single-element `values` tensor (e.g. x = torch.sparse_coo_tensor(torch.zeros(0, 1), 12.3, [])) if you want to create a scalar sparse tensor");
   _get_sparse_impl(self)->resize_and_clear_(sparseDims, denseDims, size);
   return self;
 }
@@ -173,17 +173,24 @@ SparseTensor new_with_tensor_and_size_sparse(const LongTensor& indices, const Te
 
   // Check to make sure all indices are within the boundaries of `sizes`
   if (indices.numel() > 0) {
+    LongTensor min_indices = std::get</* values */ 0>(indices.min(/* dim */ 1, /* keepdim */ false));
     LongTensor max_indices = std::get</* values */ 0>(indices.max(/* dim */ 1, /* keepdim */ false));
-    LongTensor cpu_max_indices;
-    if (max_indices.is_cuda()) {
+    LongTensor cpu_min_indices, cpu_max_indices;
+    if (indices.is_cuda()) {
+      cpu_min_indices = at::CPU(kLong).copy(min_indices);
       cpu_max_indices = at::CPU(kLong).copy(max_indices);
     } else {
+      cpu_min_indices = min_indices;
       cpu_max_indices = max_indices;
     }
+    auto cpu_min_indices_accessor = cpu_min_indices.accessor<int64_t, 1>();
     auto cpu_max_indices_accessor = cpu_max_indices.accessor<int64_t, 1>();
     for (int64_t d = 0; d < sparseDims; d++) {
       // NB: This used to sync ndim times to access each entry; now we copy
       // everything to CPU first and then access it.
+      int64_t min_index_in_dim = cpu_min_indices_accessor[d];
+      AT_CHECK(min_index_in_dim >= 0,
+               "found negative index ", min_index_in_dim, " for dim ", d);
       int64_t max_index_in_dim = cpu_max_indices_accessor[d];
       int64_t dim_size = sizes[static_cast<size_t>(d)];
       AT_CHECK(max_index_in_dim < dim_size,
diff --git a/aten/src/ATen/native/sparse/SparseUtils.h b/aten/src/ATen/native/sparse/SparseUtils.h
index d48b837bab7c7..3ce0eee53353e 100644
--- a/aten/src/ATen/native/sparse/SparseUtils.h
+++ b/aten/src/ATen/native/sparse/SparseUtils.h
@@ -108,16 +108,9 @@ inline LongTensor _newFlattenedIndices(const SparseTensor& self, bool forceClone
 // TODO: Expose this for real in ATen, some day?
 // NB: Doesn't preserve data.
 inline Tensor _new_values_with_size_of(const Tensor& values, int64_t nnz) {
-  if (values.numel() == 0) { // values tensor uninitialized
-    // TODO: This logic looks bogus; if we have an uninitialized
-    // values tensor, why should we believe that denseDims == 0?
-    // That's the assumption this code makes.
-    return values.type().tensor({nnz});
-  } else {
-    std::vector<int64_t> size = values.sizes().vec();
-    size[0] = nnz;
-    return values.type().tensor(size);
-  }
+  std::vector<int64_t> size = values.sizes().vec();
+  size[0] = nnz;
+  return values.type().tensor(size);
 }
 
 
diff --git a/test/expect/TestCudaSparse.test_print.expect b/test/expect/TestCudaSparse.test_print.expect
new file mode 100644
index 0000000000000..cae534bd16e8d
--- /dev/null
+++ b/test/expect/TestCudaSparse.test_print.expect
@@ -0,0 +1,294 @@
+# shape: torch.Size([])
+# nnz: 2
+# sparseDim: 0
+# indices shape: torch.Size([0, 2])
+# values shape: torch.Size([2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0, 1]),
+       device='cuda:0', size=(), nnz=2, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64)
+# _values
+tensor([0, 1], device='cuda:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       device='cuda:0', size=(), nnz=2, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       device='cuda:0', size=(), nnz=2, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 4)),
+       values=tensor([0., 1., 0., 1.]),
+       device='cuda:0', size=(), nnz=4, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([0., 1.], device='cuda:0', dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([0])
+# nnz: 10
+# sparseDim: 0
+# indices shape: torch.Size([0, 10])
+# values shape: torch.Size([10, 0])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='cuda:0', size=(0,), nnz=10, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64)
+# _values
+tensor([], device='cuda:0', size=(10, 0), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='cuda:0', size=(0,), nnz=10, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='cuda:0', size=(0,), nnz=10, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 20)),
+       values=tensor([], size=(20, 0)),
+       device='cuda:0', size=(0,), nnz=20, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([2])
+# nnz: 3
+# sparseDim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0, 0],
+                      [0, 1],
+                      [1, 1]]),
+       device='cuda:0', size=(2,), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([[0, 0],
+        [0, 1],
+        [1, 1]], device='cuda:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='cuda:0', size=(2,), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='cuda:0', size=(2,), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 6)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667],
+                      [0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='cuda:0', size=(2,), nnz=6, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([[0.0000, 0.3333],
+        [0.6667, 1.0000],
+        [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([100, 3])
+# nnz: 3
+# sparseDim: 1
+# indices shape: torch.Size([1, 3])
+# values shape: torch.Size([3, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0, 0, 0],
+                      [0, 0, 1],
+                      [1, 1, 1]]),
+       device='cuda:0', size=(100, 3), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([[0, 1, 2]], device='cuda:0')
+# _values
+tensor([[0, 0, 0],
+        [0, 0, 1],
+        [1, 1, 1]], device='cuda:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='cuda:0', size=(100, 3), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='cuda:0', size=(100, 3), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([[0, 1, 2, 0, 1, 2]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778],
+                      [0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='cuda:0', size=(100, 3), nnz=6, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([[0, 1, 2]], device='cuda:0', grad_fn=<NotImplemented>)
+# _values
+tensor([[0.0000, 0.2222, 0.4444],
+        [0.6667, 0.8889, 1.1111],
+        [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([100, 20, 3])
+# nnz: 0
+# sparseDim: 2
+# indices shape: torch.Size([2, 0])
+# values shape: torch.Size([0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64)
+# _values
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 3
+# sparseDim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 6)),
+       values=tensor([], size=(6, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=6, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 0
+# sparseDim: 0
+# indices shape: torch.Size([0, 0])
+# values shape: torch.Size([0, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64)
+# _values
+tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32,
+       grad_fn=<NotImplemented>)
diff --git a/test/expect/TestCudaUncoalescedSparse.test_print.expect b/test/expect/TestCudaUncoalescedSparse.test_print.expect
new file mode 100644
index 0000000000000..e6207c6da3d27
--- /dev/null
+++ b/test/expect/TestCudaUncoalescedSparse.test_print.expect
@@ -0,0 +1,294 @@
+# shape: torch.Size([])
+# nnz: 2
+# sparseDim: 0
+# indices shape: torch.Size([0, 2])
+# values shape: torch.Size([2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0, 1]),
+       device='cuda:0', size=(), nnz=2, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64)
+# _values
+tensor([0, 1], device='cuda:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       device='cuda:0', size=(), nnz=2, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       device='cuda:0', size=(), nnz=2, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 4)),
+       values=tensor([0., 1., 0., 1.]),
+       device='cuda:0', size=(), nnz=4, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 2), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([0., 1.], device='cuda:0', dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([0])
+# nnz: 10
+# sparseDim: 0
+# indices shape: torch.Size([0, 10])
+# values shape: torch.Size([10, 0])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='cuda:0', size=(0,), nnz=10, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64)
+# _values
+tensor([], device='cuda:0', size=(10, 0), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='cuda:0', size=(0,), nnz=10, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       device='cuda:0', size=(0,), nnz=10, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 20)),
+       values=tensor([], size=(20, 0)),
+       device='cuda:0', size=(0,), nnz=20, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 10), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([], device='cuda:0', size=(10, 0), dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([2])
+# nnz: 3
+# sparseDim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0, 0],
+                      [0, 1],
+                      [1, 1]]),
+       device='cuda:0', size=(2,), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([[0, 0],
+        [0, 1],
+        [1, 1]], device='cuda:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='cuda:0', size=(2,), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='cuda:0', size=(2,), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 6)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667],
+                      [0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       device='cuda:0', size=(2,), nnz=6, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([[0.0000, 0.3333],
+        [0.6667, 1.0000],
+        [1.3333, 1.6667]], device='cuda:0', dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([100, 3])
+# nnz: 3
+# sparseDim: 1
+# indices shape: torch.Size([1, 3])
+# values shape: torch.Size([3, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0, 0, 0],
+                      [0, 0, 1],
+                      [1, 1, 1]]),
+       device='cuda:0', size=(100, 3), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([[0, 1, 0]], device='cuda:0')
+# _values
+tensor([[0, 0, 0],
+        [0, 0, 1],
+        [1, 1, 1]], device='cuda:0', dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='cuda:0', size=(100, 3), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='cuda:0', size=(100, 3), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([[0, 1, 0, 0, 1, 0]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778],
+                      [0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       device='cuda:0', size=(100, 3), nnz=6, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([[0, 1, 0]], device='cuda:0', grad_fn=<NotImplemented>)
+# _values
+tensor([[0.0000, 0.2222, 0.4444],
+        [0.6667, 0.8889, 1.1111],
+        [1.3333, 1.5556, 1.7778]], device='cuda:0', dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([100, 20, 3])
+# nnz: 0
+# sparseDim: 2
+# indices shape: torch.Size([2, 0])
+# values shape: torch.Size([0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64)
+# _values
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       device='cuda:0', size=(100, 20, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(2, 0), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 3
+# sparseDim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=3, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64)
+# _values
+tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=3, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 6)),
+       values=tensor([], size=(6, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=6, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 3), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([], device='cuda:0', size=(3, 10, 0, 3), dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 0
+# sparseDim: 0
+# indices shape: torch.Size([0, 0])
+# values shape: torch.Size([0, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.int32,
+       layout=torch.sparse_coo)
+# _indices
+tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64)
+# _values
+tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo, requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       device='cuda:0', size=(10, 0, 3), nnz=0, dtype=torch.float32,
+       layout=torch.sparse_coo, grad_fn=<AddBackward0>)
+# _indices
+tensor([], device='cuda:0', size=(0, 0), dtype=torch.int64,
+       grad_fn=<NotImplemented>)
+# _values
+tensor([], device='cuda:0', size=(0, 10, 0, 3), dtype=torch.float32,
+       grad_fn=<NotImplemented>)
diff --git a/test/expect/TestSparse.test_print.expect b/test/expect/TestSparse.test_print.expect
new file mode 100644
index 0000000000000..c0223c1483741
--- /dev/null
+++ b/test/expect/TestSparse.test_print.expect
@@ -0,0 +1,262 @@
+# shape: torch.Size([])
+# nnz: 2
+# sparseDim: 0
+# indices shape: torch.Size([0, 2])
+# values shape: torch.Size([2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0, 1]),
+       size=(), nnz=2, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 2), dtype=torch.int64)
+# _values
+tensor([0, 1], dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 2.]),
+       size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 2), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([0., 1.], dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([0])
+# nnz: 10
+# sparseDim: 0
+# indices shape: torch.Size([0, 10])
+# values shape: torch.Size([10, 0])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       size=(0,), nnz=10, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 10), dtype=torch.int64)
+# _values
+tensor([], size=(10, 0), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 10), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([], size=(10, 0), dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([2])
+# nnz: 3
+# sparseDim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0, 0],
+                      [0, 1],
+                      [1, 1]]),
+       size=(2,), nnz=3, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 3), dtype=torch.int64)
+# _values
+tensor([[0, 0],
+        [0, 1],
+        [1, 1]], dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.6667],
+                      [1.3333, 2.0000],
+                      [2.6667, 3.3333]]),
+       size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 3), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([[0.0000, 0.3333],
+        [0.6667, 1.0000],
+        [1.3333, 1.6667]], dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([100, 3])
+# nnz: 3
+# sparseDim: 1
+# indices shape: torch.Size([1, 3])
+# values shape: torch.Size([3, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0, 0, 0],
+                      [0, 0, 1],
+                      [1, 1, 1]]),
+       size=(100, 3), nnz=3, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([[0, 1, 2]])
+# _values
+tensor([[0, 0, 0],
+        [0, 0, 1],
+        [1, 1, 1]], dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([[0, 1, 2]]),
+       values=tensor([[0.0000, 0.4444, 0.8889],
+                      [1.3333, 1.7778, 2.2222],
+                      [2.6667, 3.1111, 3.5556]]),
+       size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([[0, 1, 2]], grad_fn=<NotImplemented>)
+# _values
+tensor([[0.0000, 0.2222, 0.4444],
+        [0.6667, 0.8889, 1.1111],
+        [1.3333, 1.5556, 1.7778]], dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([100, 20, 3])
+# nnz: 0
+# sparseDim: 2
+# indices shape: torch.Size([2, 0])
+# values shape: torch.Size([0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       size=(100, 20, 3), nnz=0, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(2, 0), dtype=torch.int64)
+# _values
+tensor([], size=(0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(2, 0), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([], size=(0, 3), dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 3
+# sparseDim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       size=(10, 0, 3), nnz=3, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 3), dtype=torch.int64)
+# _values
+tensor([], size=(3, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 3), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 0
+# sparseDim: 0
+# indices shape: torch.Size([0, 0])
+# values shape: torch.Size([0, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       size=(10, 0, 3), nnz=0, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 0), dtype=torch.int64)
+# _values
+tensor([], size=(0, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 0), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=<NotImplemented>)
diff --git a/test/expect/TestUncoalescedSparse.test_print.expect b/test/expect/TestUncoalescedSparse.test_print.expect
new file mode 100644
index 0000000000000..244442de0cc73
--- /dev/null
+++ b/test/expect/TestUncoalescedSparse.test_print.expect
@@ -0,0 +1,262 @@
+# shape: torch.Size([])
+# nnz: 2
+# sparseDim: 0
+# indices shape: torch.Size([0, 2])
+# values shape: torch.Size([2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0, 1]),
+       size=(), nnz=2, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 2), dtype=torch.int64)
+# _values
+tensor([0, 1], dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 1.]),
+       size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 2)),
+       values=tensor([0., 2.]),
+       size=(), nnz=2, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 2), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([0., 1.], dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([0])
+# nnz: 10
+# sparseDim: 0
+# indices shape: torch.Size([0, 10])
+# values shape: torch.Size([10, 0])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       size=(0,), nnz=10, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 10), dtype=torch.int64)
+# _values
+tensor([], size=(10, 0), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 10)),
+       values=tensor([], size=(10, 0)),
+       size=(0,), nnz=10, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 10), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([], size=(10, 0), dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([2])
+# nnz: 3
+# sparseDim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 2])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0, 0],
+                      [0, 1],
+                      [1, 1]]),
+       size=(2,), nnz=3, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 3), dtype=torch.int64)
+# _values
+tensor([[0, 0],
+        [0, 1],
+        [1, 1]], dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.3333],
+                      [0.6667, 1.0000],
+                      [1.3333, 1.6667]]),
+       size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([[0.0000, 0.6667],
+                      [1.3333, 2.0000],
+                      [2.6667, 3.3333]]),
+       size=(2,), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 3), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([[0.0000, 0.3333],
+        [0.6667, 1.0000],
+        [1.3333, 1.6667]], dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([100, 3])
+# nnz: 3
+# sparseDim: 1
+# indices shape: torch.Size([1, 3])
+# values shape: torch.Size([3, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0, 0, 0],
+                      [0, 0, 1],
+                      [1, 1, 1]]),
+       size=(100, 3), nnz=3, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([[0, 1, 0]])
+# _values
+tensor([[0, 0, 0],
+        [0, 0, 1],
+        [1, 1, 1]], dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0.0000, 0.2222, 0.4444],
+                      [0.6667, 0.8889, 1.1111],
+                      [1.3333, 1.5556, 1.7778]]),
+       size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([[0, 1, 0]]),
+       values=tensor([[0.0000, 0.4444, 0.8889],
+                      [1.3333, 1.7778, 2.2222],
+                      [2.6667, 3.1111, 3.5556]]),
+       size=(100, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([[0, 1, 0]], grad_fn=<NotImplemented>)
+# _values
+tensor([[0.0000, 0.2222, 0.4444],
+        [0.6667, 0.8889, 1.1111],
+        [1.3333, 1.5556, 1.7778]], dtype=torch.float32,
+       grad_fn=<NotImplemented>)
+
+# shape: torch.Size([100, 20, 3])
+# nnz: 0
+# sparseDim: 2
+# indices shape: torch.Size([2, 0])
+# values shape: torch.Size([0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       size=(100, 20, 3), nnz=0, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(2, 0), dtype=torch.int64)
+# _values
+tensor([], size=(0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(2, 0)),
+       values=tensor([], size=(0, 3)),
+       size=(100, 20, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(2, 0), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([], size=(0, 3), dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 3
+# sparseDim: 0
+# indices shape: torch.Size([0, 3])
+# values shape: torch.Size([3, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       size=(10, 0, 3), nnz=3, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 3), dtype=torch.int64)
+# _values
+tensor([], size=(3, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 3)),
+       values=tensor([], size=(3, 10, 0, 3)),
+       size=(10, 0, 3), nnz=3, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 3), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([], size=(3, 10, 0, 3), dtype=torch.float32, grad_fn=<NotImplemented>)
+
+# shape: torch.Size([10, 0, 3])
+# nnz: 0
+# sparseDim: 0
+# indices shape: torch.Size([0, 0])
+# values shape: torch.Size([0, 10, 0, 3])
+########## torch.int32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       size=(10, 0, 3), nnz=0, dtype=torch.int32, layout=torch.sparse_coo)
+# _indices
+tensor([], size=(0, 0), dtype=torch.int64)
+# _values
+tensor([], size=(0, 10, 0, 3), dtype=torch.int32)
+########## torch.float32 ##########
+# sparse tensor
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo)
+# after requires_grad_
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo,
+       requires_grad=True)
+# after addition
+tensor(indices=tensor([], size=(0, 0)),
+       values=tensor([], size=(0, 10, 0, 3)),
+       size=(10, 0, 3), nnz=0, dtype=torch.float32, layout=torch.sparse_coo,
+       grad_fn=<AddBackward0>)
+# _indices
+tensor([], size=(0, 0), dtype=torch.int64, grad_fn=<NotImplemented>)
+# _values
+tensor([], size=(0, 10, 0, 3), dtype=torch.float32, grad_fn=<NotImplemented>)
diff --git a/test/test_sparse.py b/test/test_sparse.py
index 01a382f3901ff..fab6ad978176b 100644
--- a/test/test_sparse.py
+++ b/test/test_sparse.py
@@ -107,6 +107,61 @@ def randn(self, *args, **kwargs):
         # TODO: Put this in torch.cuda.randn
         return self.ValueTensor(*args, **kwargs).normal_()
 
+    @skipIfRocm  # ROCm stack doesn't like the x + x call
+    def test_print(self):
+        shape_sparseDim_nnz = [
+            ((), 0, 2),
+            ((0,), 0, 10),
+            ((2,), 0, 3),
+            ((100, 3), 1, 3),
+            ((100, 20, 3), 2, 0),
+            ((10, 0, 3), 0, 3),
+            ((10, 0, 3), 0, 0),
+        ]
+
+        printed = []
+        for shape, sparseDim, nnz in shape_sparseDim_nnz:
+            indices_shape = torch.Size((sparseDim, nnz))
+            values_shape = torch.Size((nnz,) + shape[sparseDim:])
+            printed.append("# shape: {}".format(torch.Size(shape)))
+            printed.append("# nnz: {}".format(nnz))
+            printed.append("# sparseDim: {}".format(sparseDim))
+            printed.append("# indices shape: {}".format(indices_shape))
+            printed.append("# values shape: {}".format(values_shape))
+
+            indices = torch.arange(indices_shape.numel(), dtype=self.IndexTensor.dtype,
+                                   device=self.device).view(indices_shape)
+            for d in range(sparseDim):
+                indices[d].clamp_(max=(shape[d] - 1))  # make it valid index
+            if self.is_uncoalesced and indices.numel() > 0:
+                indices[:, -1] = indices[:, 0]  # make it uncoalesced
+            values_numel = values_shape.numel()
+            values = torch.arange(values_numel, dtype=self.ValueTensor.dtype,
+                                  device=self.device).view(values_shape).div_(values_numel / 2.)
+            sp_tensor = self.SparseTensor(indices, values, shape)
+
+            dtypes = [torch.int32]
+            if values.dtype == torch.double:
+                dtypes.append(torch.float)
+            else:
+                dtypes.append(torch.double)
+            for dtype in dtypes:
+                printed.append("########## {} ##########".format(dtype))
+                x = sp_tensor.detach().to(dtype)
+                printed.append("# sparse tensor")
+                printed.append(str(x))
+                if x.dtype.is_floating_point:
+                    printed.append("# after requires_grad_")
+                    printed.append(str(x.requires_grad_()))
+                    printed.append("# after addition")
+                    printed.append(str(x + x))
+                printed.append("# _indices")
+                printed.append(str(x._indices()))
+                printed.append("# _values")
+                printed.append(str(x._values()))
+            printed.append('')
+        self.assertExpected('\n'.join(printed))
+
     @skipIfRocm
     def test_basic(self):
         x, i, v = self._gen_sparse(3, 10, 100)
@@ -997,6 +1052,10 @@ def test_factory_size_check(self):
         with self.assertRaisesRegex(RuntimeError, "sizes is inconsistent with indices"):
             torch.sparse_coo_tensor(indices, values, sizes)
 
+        indices.fill_(-1)
+        with self.assertRaisesRegex(RuntimeError, "found negative index"):
+            torch.sparse_coo_tensor(indices, values, sizes)
+
         indices = self.IndexTensor([[1, 2], [0, 2]])
         values = self.ValueTensor([[1, 1, 1], [1, 1, 1]])
         sizes = torch.Size([3, 3, 2])
diff --git a/tools/autograd/gen_variable_type.py b/tools/autograd/gen_variable_type.py
index 1688828669ded..49b0944566187 100644
--- a/tools/autograd/gen_variable_type.py
+++ b/tools/autograd/gen_variable_type.py
@@ -302,16 +302,12 @@ def is_differentiable(arg):
         print('WARNING: derivative ignored for {}'.format(name), file=sys.stderr)
 
     def setup_derivative():
-        def error_msg():
-            name = declaration['api_name']
-            return '"the derivative for {} is not implemented"'.format(name)
-
         args_with_derivatives = find_args_with_derivatives()
 
         env = {}
         env['args_with_derivatives'] = reference_args(args_with_derivatives)
-        env['op'] = func['op'] if func is not None else 'Error'
-        env['op_ctor'] = '' if func is not None else error_msg()
+        env['op'] = func['op'] if func is not None else 'NotImplemented'
+        env['op_ctor'] = '' if func is not None else '"{}"'.format(declaration['api_name'])
 
         if is_out_fn:
             setup = ['throw_error_out_requires_grad("{}");'.format(base_name)]
diff --git a/torch/_tensor_str.py b/torch/_tensor_str.py
index 71f2afb45b49f..3d0c6b0c1ae1e 100644
--- a/torch/_tensor_str.py
+++ b/torch/_tensor_str.py
@@ -174,7 +174,7 @@ def _vector_str(self, indent, formatter, summarize):
     return '[' + (',' + '\n' + ' ' * (indent + 1)).join(lines) + ']'
 
 
-def _tensor_str(self, indent, formatter, summarize):
+def _tensor_str_with_formatter(self, indent, formatter, summarize):
     dim = self.dim()
 
     if dim == 0:
@@ -183,24 +183,42 @@ def _tensor_str(self, indent, formatter, summarize):
         return _vector_str(self, indent, formatter, summarize)
 
     if summarize and self.size(0) > 2 * PRINT_OPTS.edgeitems:
-        slices = ([_tensor_str(self[i], indent + 1, formatter, summarize)
+        slices = ([_tensor_str_with_formatter(self[i], indent + 1, formatter, summarize)
                    for i in range(0, PRINT_OPTS.edgeitems)] +
                   ['...'] +
-                  [_tensor_str(self[i], indent + 1, formatter, summarize)
+                  [_tensor_str_with_formatter(self[i], indent + 1, formatter, summarize)
                    for i in range(len(self) - PRINT_OPTS.edgeitems, len(self))])
     else:
-        slices = [_tensor_str(self[i], indent + 1, formatter, summarize) for i in range(0, self.size(0))]
+        slices = [_tensor_str_with_formatter(self[i], indent + 1, formatter, summarize)
+                  for i in range(0, self.size(0))]
 
     tensor_str = (',' + '\n' * (dim - 1) + ' ' * (indent + 1)).join(slices)
     return '[' + tensor_str + ']'
 
 
-def _maybe_wrap_suffix(suffix, indent, tensor_str):
-    suffix_len = len(suffix)
+def _tensor_str(self, indent):
+    if self.numel() == 0:
+        return '[]'
+
+    summarize = self.numel() > PRINT_OPTS.threshold
+    formatter = _Formatter(get_summarized_data(self) if summarize else self)
+    return _tensor_str_with_formatter(self, indent, formatter, summarize)
+
+
+def _add_suffixes(tensor_str, suffixes, indent, force_newline):
+    tensor_strs = [tensor_str]
     last_line_len = len(tensor_str) - tensor_str.rfind('\n') + 1
-    if suffix_len > 2 and last_line_len + suffix_len > PRINT_OPTS.linewidth:
-        return ',\n' + ' ' * indent + suffix[2:]
-    return suffix
+    for suffix in suffixes:
+        suffix_len = len(suffix)
+        if force_newline or last_line_len + suffix_len + 2 > PRINT_OPTS.linewidth:
+            tensor_strs.append(',\n' + ' ' * indent + suffix)
+            last_line_len = indent + suffix_len
+            force_newline = False
+        else:
+            tensor_strs.append(', ' + suffix)
+            last_line_len += suffix_len + 2
+    tensor_strs.append(')')
+    return ''.join(tensor_strs)
 
 
 def get_summarized_data(self):
@@ -222,50 +240,60 @@ def get_summarized_data(self):
 
 
 def _str(self):
-    if self.is_sparse:
-        size_str = str(tuple(self.shape)).replace(' ', '')
-        return '{} of size {} with indices:\n{}\nand values:\n{}'.format(
-            self.type(), size_str, self._indices(), self._values())
-
     prefix = 'tensor('
     indent = len(prefix)
-    summarize = self.numel() > PRINT_OPTS.threshold
 
-    suffix = ''
+    suffixes = []
     if not torch._C._is_default_type_cuda():
         if self.device.type == 'cuda':
-            suffix += ', device=\'' + str(self.device) + '\''
+            suffixes.append('device=\'' + str(self.device) + '\'')
     else:
         if self.device.type == 'cpu' or torch.cuda.current_device() != self.device.index:
-            suffix += ', device=\'' + str(self.device) + '\''
+            suffixes.append('device=\'' + str(self.device) + '\'')
 
-    if self.numel() == 0:
-        # Explicitly print the shape if it is not (0,), to match NumPy behavior
-        if self.dim() != 1:
-            suffix += ', size=' + str(tuple(self.shape))
-
-        # In an empty tensor, there are no elements to infer if the dtype should be int64,
-        # so it must be shown explicitly.
-        if self.dtype != torch.get_default_dtype():
-            suffix += ', dtype=' + str(self.dtype)
-        tensor_str = '[]'
+    has_default_dtype = self.dtype == torch.get_default_dtype() or self.dtype == torch.int64
+
+    if self.is_sparse:
+        suffixes.append('size=' + str(tuple(self.shape)))
+        suffixes.append('nnz=' + str(self._nnz()))
+        if not has_default_dtype:
+            suffixes.append('dtype=' + str(self.dtype))
+        indices_prefix = 'indices=tensor('
+        indices = self._indices().detach()
+        indices_str = _tensor_str(indices, indent + len(indices_prefix))
+        if indices.numel() == 0:
+            indices_str += ', size=' + str(tuple(indices.shape))
+        values_prefix = 'values=tensor('
+        values = self._values().detach()
+        values_str = _tensor_str(values, indent + len(values_prefix))
+        if values.numel() == 0:
+            values_str += ', size=' + str(tuple(values.shape))
+        tensor_str = indices_prefix + indices_str + '),\n' + ' ' * indent + values_prefix + values_str + ')'
     else:
-        if self.dtype != torch.get_default_dtype() and self.dtype != torch.int64:
-            suffix += ', dtype=' + str(self.dtype)
+        if self.numel() == 0 and not self.is_sparse:
+            # Explicitly print the shape if it is not (0,), to match NumPy behavior
+            if self.dim() != 1:
+                suffixes.append('size=' + str(tuple(self.shape)))
+
+            # In an empty tensor, there are no elements to infer if the dtype
+            # should be int64, so it must be shown explicitly.
+            if self.dtype != torch.get_default_dtype():
+                suffixes.append('dtype=' + str(self.dtype))
+            tensor_str = '[]'
+        else:
+            if not has_default_dtype:
+                suffixes.append('dtype=' + str(self.dtype))
+            tensor_str = _tensor_str(self, indent)
 
-        formatter = _Formatter(get_summarized_data(self) if summarize else self)
-        tensor_str = _tensor_str(self, indent, formatter, summarize)
+    if self.layout != torch.strided:
+        suffixes.append('layout=' + str(self.layout))
 
     if self.grad_fn is not None:
         name = type(self.grad_fn).__name__
         if name == 'CppFunction':
             name = self.grad_fn.name().rsplit('::', maxsplit=1)[-1]
-        suffix += ', grad_fn=<{}>'.format(name)
+        suffixes.append('grad_fn=<{}>'.format(name))
     elif self.requires_grad:
-        suffix += ', requires_grad=True'
-
-    suffix += ')'
-
-    suffix = _maybe_wrap_suffix(suffix, indent, tensor_str)
+        suffixes.append('requires_grad=True')
 
-    return prefix + tensor_str + suffix
+    return _add_suffixes(prefix + tensor_str, suffixes, indent, force_newline=self.is_sparse)
diff --git a/torch/csrc/Size.cpp b/torch/csrc/Size.cpp
index 6bc458592f957..d11a718634473 100644
--- a/torch/csrc/Size.cpp
+++ b/torch/csrc/Size.cpp
@@ -135,6 +135,22 @@ static PyMappingMethods THPSize_as_mapping = {
     0
 };
 
+static PyObject *THPSize_numel(THPSize *self)
+{
+  HANDLE_TH_ERRORS
+  int64_t numel = 1;
+  for (Py_ssize_t i = 0; i < PyTuple_Size((PyObject*)self); ++i) {
+    numel *= PyLong_AsLong(PyTuple_GET_ITEM(self, i));
+  }
+  return THPUtils_packInt64(numel);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyMethodDef THPSize_methods[] = {
+  {"numel",       (PyCFunction)THPSize_numel,       METH_NOARGS,  nullptr},
+  {nullptr}
+};
+
 
 PyTypeObject THPSizeType = {
   PyVarObject_HEAD_INIT(nullptr, 0)
@@ -157,14 +173,14 @@ PyTypeObject THPSizeType = {
   0,                                     /* tp_setattro */
   0,                                     /* tp_as_buffer */
   Py_TPFLAGS_DEFAULT,                    /* tp_flags */
-  nullptr,                                  /* tp_doc */
+  nullptr,                               /* tp_doc */
   0,                                     /* tp_traverse */
   0,                                     /* tp_clear */
   0,                                     /* tp_richcompare */
   0,                                     /* tp_weaklistoffset */
   0,                                     /* tp_iter */
   0,                                     /* tp_iternext */
-  0,                                     /* tp_methods */
+  THPSize_methods,                       /* tp_methods */
   0,                                     /* tp_members */
   0,                                     /* tp_getset */
   &PyTuple_Type,                         /* tp_base */
diff --git a/torch/csrc/autograd/functions/basic_ops.h b/torch/csrc/autograd/functions/basic_ops.h
index ad7a9e52d3757..c8812c23f3a7c 100644
--- a/torch/csrc/autograd/functions/basic_ops.h
+++ b/torch/csrc/autograd/functions/basic_ops.h
@@ -24,6 +24,18 @@ struct TORCH_API Error : public Function {
   std::string msg;
 };
 
+// We print grad_fn names in tensor printing. For functions with backward
+// NYI, grad_fn=<Error> will be printed if we use Error, which is confusing. So
+// special case with a new NotImplemented function here.
+struct TORCH_API NotImplemented : public Error {
+  NotImplemented(std::string forward_fn, edge_list&& next_edges)
+    : Error("derivative for " + forward_fn + " is not implemented",
+            std::move(next_edges)) {}
+
+  NotImplemented(std::string forward_fn)
+    : Error("derivative for " + forward_fn + " is not implemented") {}
+};
+
 // Identity in forward, Error in backward. Used to implement @once_differentiable
 struct TORCH_API DelayedError : public Function {
   DelayedError(std::string msg, int num_inputs)
diff --git a/torch/csrc/autograd/functions/init.cpp b/torch/csrc/autograd/functions/init.cpp
index 6988e650f888b..b25df7542d675 100644
--- a/torch/csrc/autograd/functions/init.cpp
+++ b/torch/csrc/autograd/functions/init.cpp
@@ -93,6 +93,9 @@ void THPAutograd_initFunctions()
   static PyTypeObject ErrorClass;
   addClass<Error, NoCtor>(module, ErrorClass, "Error");
 
+  static PyTypeObject NotImplementedClass;
+  addClass<NotImplemented, NoCtor>(module, NotImplementedClass, "NotImplemented");
+
   static PyTypeObject DelayedErrorClass;
   addClass<DelayedError, DelayedErrorCtor>(module, DelayedErrorClass, "DelayedError");