Add sparsity option to random hypervectors (#47)

mikeheddes · web-flow · commit 846d0b5cfff5 · 2022-05-18T19:44:47.000-07:00
* Add sparsity option to random hypervectors

* Update random hypervector creation

* Remove unused import
diff --git a/torchhd/functional.py b/torchhd/functional.py
@@ -37,7 +37,6 @@ def identity_hv(
     num_embeddings: int,
     embedding_dim: int,
     *,
-    out=None,
     dtype=None,
     device=None,
     requires_grad=False,
@@ -49,7 +48,6 @@ def identity_hv(
     Args:
         num_embeddings (int): the number of hypervectors to generate.
         embedding_dim (int): the dimensionality of the hypervectors.
-        out (Tensor, optional): the output tensor.
         dtype (``torch.dtype``, optional): the desired data type of returned tensor. Default: if ``None``, uses a global default (see ``torch.set_default_tensor_type()``).
         device (``torch.device``, optional):  the desired device of returned tensor. Default: if ``None``, uses the current device for the default tensor type (see torch.set_default_tensor_type()). ``device`` will be the CPU for CPU tensor types and the current CUDA device for CUDA tensor types.
         requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: ``False``.
@@ -67,7 +65,6 @@ def identity_hv(
     return torch.ones(
         num_embeddings,
         embedding_dim,
-        out=out,
         dtype=dtype,
         device=device,
         requires_grad=requires_grad,
@@ -78,8 +75,8 @@ def random_hv(
     num_embeddings: int,
     embedding_dim: int,
     *,
+    sparsity=0.5,
     generator=None,
-    out=None,
     dtype=None,
     device=None,
     requires_grad=False,
@@ -91,38 +88,38 @@ def random_hv(
     Args:
         num_embeddings (int): the number of hypervectors to generate.
         embedding_dim (int): the dimensionality of the hypervectors.
+        sparsity (float, optional): the expected fraction of elements to be +1. Default: ``0.5``.
         generator (``torch.Generator``, optional): a pseudorandom number generator for sampling.
-        out (Tensor, optional): the output tensor.
         dtype (``torch.dtype``, optional): the desired data type of returned tensor. Default: if ``None``, uses a global default (see ``torch.set_default_tensor_type()``).
         device (``torch.device``, optional):  the desired device of returned tensor. Default: if ``None``, uses the current device for the default tensor type (see torch.set_default_tensor_type()). ``device`` will be the CPU for CPU tensor types and the current CUDA device for CUDA tensor types.
         requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: ``False``.
 
     Examples::
 
-        >>> functional.random_hv(2, 3)
-        tensor([[ 1.,  -1.,  -1.],
-                [ -1.,  1.,  -1.]])
+        >>> functional.random_hv(2, 5)
+        tensor([[-1.,  1., -1., -1.,  1.],
+                [ 1., -1., -1., -1., -1.]])
+        >>> functional.random_hv(2, 5, sparsity=0.9)
+        tensor([[ 1.,  1.,  1., -1.,  1.],
+                [ 1.,  1.,  1.,  1.,  1.]])
+        >>> functional.random_hv(2, 5, dtype=torch.long)
+        tensor([[ 1, -1,  1,  1,  1],
+                [ 1,  1, -1, -1,  1]])
 
     """
     if dtype is None:
         dtype = torch.get_default_dtype()
 
-    selection = torch.randint(
-        0,
-        2,
-        size=(num_embeddings * embedding_dim,),
-        generator=generator,
-        dtype=torch.long,
-        device=device,
-    )
-
-    if out is not None:
-        out = out.view(num_embeddings * embedding_dim)
-
-    options = torch.tensor([1, -1], dtype=dtype, device=device)
-    hv = torch.index_select(options, 0, selection, out=out)
-    hv.requires_grad = requires_grad
-    return hv.view(num_embeddings, embedding_dim)
+    select = torch.empty(
+        (
+            num_embeddings,
+            embedding_dim,
+        ),
+        dtype=torch.bool,
+    ).bernoulli_(1.0 - sparsity, generator=generator)
+    result = torch.where(select, -1, +1).to(dtype=dtype, device=device)
+    result.requires_grad = requires_grad
+    return result
 
 
 def level_hv(
@@ -131,7 +128,6 @@ def level_hv(
     *,
     randomness=0.0,
     generator=None,
-    out=None,
     dtype=None,
     device=None,
     requires_grad=False,
@@ -146,7 +142,6 @@ def level_hv(
         embedding_dim (int): the dimensionality of the hypervectors.
         randomness (float, optional): r-value to interpolate between level at ``0.0`` and random-hypervectors at ``1.0``. Default: ``0.0``.
         generator (``torch.Generator``, optional): a pseudorandom number generator for sampling.
-        out (Tensor, optional): the output tensor.
         dtype (``torch.dtype``, optional): the desired data type of returned tensor. Default: if ``None``, uses a global default (see ``torch.set_default_tensor_type()``).
         device (``torch.device``, optional):  the desired device of returned tensor. Default: if ``None``, uses the current device for the default tensor type (see torch.set_default_tensor_type()). ``device`` will be the CPU for CPU tensor types and the current CUDA device for CUDA tensor types.
         requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: ``False``.
@@ -164,7 +159,6 @@ def level_hv(
     hv = torch.zeros(
         num_embeddings,
         embedding_dim,
-        out=out,
         dtype=dtype,
         device=device,
     )
@@ -219,7 +213,6 @@ def circular_hv(
     *,
     randomness=0.0,
     generator=None,
-    out=None,
     dtype=None,
     device=None,
     requires_grad=False,
@@ -234,7 +227,6 @@ def circular_hv(
         embedding_dim (int): the dimensionality of the hypervectors.
         randomness (float, optional): r-value to interpolate between circular at ``0.0`` and random-hypervectors at ``1.0``. Default: ``0.0``.
         generator (``torch.Generator``, optional): a pseudorandom number generator for sampling.
-        out (Tensor, optional): the output tensor.
         dtype (``torch.dtype``, optional): the desired data type of returned tensor. Default: if ``None``, uses a global default (see ``torch.set_default_tensor_type()``).
         device (``torch.device``, optional):  the desired device of returned tensor. Default: if ``None``, uses the current device for the default tensor type (see torch.set_default_tensor_type()). ``device`` will be the CPU for CPU tensor types and the current CUDA device for CUDA tensor types.
         requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: ``False``.
@@ -252,7 +244,6 @@ def circular_hv(
     hv = torch.zeros(
         num_embeddings,
         embedding_dim,
-        out=out,
         dtype=dtype,
         device=device,
     )
diff --git a/torchhd/tests/test_functional.py b/torchhd/tests/test_functional.py
@@ -16,15 +16,6 @@ def test_value(self):
         hv = functional.identity_hv(4, 85)
         assert (hv == 1).min().item()
 
-    def test_out(self):
-        buffer = torch.empty(3, 52)
-        hv = functional.identity_hv(3, 52, out=buffer)
-
-        assert buffer.data_ptr() == hv.data_ptr()
-        assert hv.dim() == 2
-        assert hv.size(0) == 3
-        assert hv.size(1) == 52
-
     def test_device(self):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         hv = functional.identity_hv(3, 52, device=device)
@@ -53,23 +44,17 @@ def test_requires_grad(self):
 
     def test_integration(self):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        buffer = torch.empty(6, 10000, dtype=torch.float16)
         hv = functional.identity_hv(
-            6, 10000, out=buffer, dtype=torch.float16, requires_grad=True, device=device
+            6, 10000, dtype=torch.float16, requires_grad=True, device=device
         )
 
-        assert buffer.data_ptr() == hv.data_ptr()
         assert hv.dim() == 2
         assert hv.size(0) == 6
         assert hv.size(1) == 10000
         assert hv.requires_grad == True
         assert hv.dtype == torch.float16
         assert hv.device == device
 
-        with pytest.raises(RuntimeError):
-            buffer = torch.empty(6, 10000, dtype=torch.float)
-            hv = functional.identity_hv(6, 10000, out=buffer, dtype=torch.float16)
-
 
 class TestRandom_hv:
     def test_shape(self):
@@ -102,15 +87,6 @@ def test_generator(self):
 
         assert (hv1 == hv2).min().item()
 
-    def test_out(self):
-        buffer = torch.empty(3, 52)
-        hv = functional.random_hv(3, 52, out=buffer)
-
-        assert buffer.data_ptr() == hv.data_ptr()
-        assert hv.dim() == 2
-        assert hv.size(0) == 3
-        assert hv.size(1) == 52
-
     def test_device(self):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         hv = functional.random_hv(3, 52, device=device)
@@ -142,23 +118,17 @@ def test_requires_grad(self):
 
     def test_integration(self):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        buffer = torch.empty(6, 10000, dtype=torch.float)
         hv = functional.random_hv(
-            6, 10000, out=buffer, dtype=torch.float, requires_grad=True, device=device
+            6, 10000, dtype=torch.float, requires_grad=True, device=device
         )
 
-        assert buffer.data_ptr() == hv.data_ptr()
         assert hv.dim() == 2
         assert hv.size(0) == 6
         assert hv.size(1) == 10000
         assert hv.requires_grad == True
         assert hv.dtype == torch.float
         assert hv.device == device
 
-        with pytest.raises(RuntimeError):
-            buffer = torch.empty(6, 10000, dtype=torch.float)
-            hv = functional.random_hv(6, 10000, out=buffer, dtype=torch.float16)
-
 
 class TestLevel_hv:
     def test_shape(self):
@@ -180,9 +150,9 @@ def test_value(self):
         sim = functional.cosine_similarity(hv[0], hv[1].unsqueeze(0))
         assert sim.abs().item() > 0.98
         sim = functional.cosine_similarity(hv[0], hv[24].unsqueeze(0))
-        assert sim.abs().item() > 0.49
+        assert sim.abs().item() > 0.47
         sim = functional.cosine_similarity(hv[0], hv[24].unsqueeze(0))
-        assert sim.abs().item() < 0.51
+        assert sim.abs().item() < 0.52
         sim = functional.cosine_similarity(hv[40], hv[41].unsqueeze(0))
         assert sim.abs().item() > 0.98
 
@@ -197,15 +167,6 @@ def test_generator(self):
 
         assert (hv1 == hv2).min().item()
 
-    def test_out(self):
-        buffer = torch.empty(20, 52)
-        hv = functional.level_hv(20, 52, out=buffer)
-
-        assert buffer.data_ptr() == hv.data_ptr()
-        assert hv.dim() == 2
-        assert hv.size(0) == 20
-        assert hv.size(1) == 52
-
     def test_device(self):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         hv = functional.level_hv(3, 52, device=device)
@@ -237,23 +198,17 @@ def test_requires_grad(self):
 
     def test_integration(self):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        buffer = torch.empty(6, 10000, dtype=torch.float)
         hv = functional.level_hv(
-            6, 10000, out=buffer, dtype=torch.float, requires_grad=True, device=device
+            6, 10000, dtype=torch.float, requires_grad=True, device=device
         )
 
-        assert buffer.data_ptr() == hv.data_ptr()
         assert hv.dim() == 2
         assert hv.size(0) == 6
         assert hv.size(1) == 10000
         assert hv.requires_grad == True
         assert hv.dtype == torch.float
         assert hv.device == device
 
-        with pytest.raises(RuntimeError):
-            buffer = torch.empty(6, 10000, dtype=torch.float)
-            hv = functional.level_hv(6, 10000, out=buffer, dtype=torch.float16)
-
 
 class TestCircular_hv:
     def test_shape(self):
@@ -277,13 +232,13 @@ def test_value(self):
         sim = functional.cosine_similarity(hv[0], hv[49].unsqueeze(0))
         assert sim.abs().item() > 0.95
         sim = functional.cosine_similarity(hv[0], hv[12].unsqueeze(0))
-        assert sim.abs().item() > 0.49
+        assert sim.abs().item() > 0.47
         sim = functional.cosine_similarity(hv[0], hv[37].unsqueeze(0))
-        assert sim.abs().item() > 0.49
+        assert sim.abs().item() > 0.47
         sim = functional.cosine_similarity(hv[0], hv[12].unsqueeze(0))
-        assert sim.abs().item() < 0.52
+        assert sim.abs().item() < 0.54
         sim = functional.cosine_similarity(hv[0], hv[37].unsqueeze(0))
-        assert sim.abs().item() < 0.52
+        assert sim.abs().item() < 0.54
         sim = functional.cosine_similarity(hv[40], hv[41].unsqueeze(0))
         assert sim.abs().item() > 0.96
 
@@ -298,15 +253,6 @@ def test_generator(self):
 
         assert (hv1 == hv2).min().item()
 
-    def test_out(self):
-        buffer = torch.empty(20, 52)
-        hv = functional.circular_hv(20, 52, out=buffer)
-
-        assert buffer.data_ptr() == hv.data_ptr()
-        assert hv.dim() == 2
-        assert hv.size(0) == 20
-        assert hv.size(1) == 52
-
     def test_device(self):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         hv = functional.circular_hv(3, 52, device=device)
@@ -338,23 +284,17 @@ def test_requires_grad(self):
 
     def test_integration(self):
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        buffer = torch.empty(6, 10000, dtype=torch.float)
         hv = functional.circular_hv(
-            6, 10000, out=buffer, dtype=torch.float, requires_grad=True, device=device
+            6, 10000, dtype=torch.float, requires_grad=True, device=device
         )
 
-        assert buffer.data_ptr() == hv.data_ptr()
         assert hv.dim() == 2
         assert hv.size(0) == 6
         assert hv.size(1) == 10000
         assert hv.requires_grad == True
         assert hv.dtype == torch.float
         assert hv.device == device
 
-        with pytest.raises(RuntimeError):
-            buffer = torch.empty(6, 10000, dtype=torch.float)
-            hv = functional.circular_hv(6, 10000, out=buffer, dtype=torch.float16)
-
 
 class TestBind:
     def test_value(self):