hyperdimensional-computing · mikeheddes · Jan 26, 2023 · Jan 26, 2023 · Jan 26, 2023 · Jan 26, 2023
diff --git a/docs/index.rst b/docs/index.rst
@@ -18,6 +18,7 @@ Torchhd is a Python library dedicated to *Hyperdimensional Computing* (also know
    torchhd
    embeddings
    structures
+   models
    datasets
    utils
 

diff --git a/docs/models.rst b/docs/models.rst
@@ -0,0 +1,13 @@
+.. _models:
+
+torchhd.models
+==================
+
+.. currentmodule:: torchhd.models
+
+.. autosummary::
+    :toctree: generated/
+    :template: class.rst
+
+    Centroid
+
diff --git a/examples/emg_hand_gestures.py b/examples/emg_hand_gestures.py
@@ -9,6 +9,7 @@
 
 import torchhd
 from torchhd import embeddings
+from torchhd.models import Centroid
 from torchhd.datasets import EMGHandGestures
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -27,31 +28,23 @@ def transform(x):
     return x[SUBSAMPLES]
 
 
-class Model(nn.Module):
-    def __init__(self, num_classes, timestamps, channels):
-        super(Model, self).__init__()
+class Encoder(nn.Module):
+    def __init__(self, out_features, timestamps, channels):
+        super(Encoder, self).__init__()
 
-        self.channels = embeddings.Random(channels, DIMENSIONS)
-        self.timestamps = embeddings.Random(timestamps, DIMENSIONS)
-        self.signals = embeddings.Level(NUM_LEVELS, DIMENSIONS, high=20)
+        self.channels = embeddings.Random(channels, out_features)
+        self.timestamps = embeddings.Random(timestamps, out_features)
+        self.signals = embeddings.Level(NUM_LEVELS, out_features, high=20)
 
-        self.classify = nn.Linear(DIMENSIONS, num_classes, bias=False)
-        self.classify.weight.data.fill_(0.0)
-
-    def encode(self, x: torch.Tensor) -> torch.Tensor:
-        signal = self.signals(x)
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        signal = self.signals(input)
         samples = torchhd.bind(signal, self.channels.weight.unsqueeze(0))
         samples = torchhd.bind(signal, self.timestamps.weight.unsqueeze(1))
 
         samples = torchhd.multiset(samples)
         sample_hv = torchhd.ngrams(samples, n=N_GRAM_SIZE)
         return torchhd.hard_quantize(sample_hv)
 
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        enc = self.encode(x)
-        logit = self.classify(enc)
-        return logit
-
 
 def experiment(subjects=[0]):
     print("List of subjects " + str(subjects))
@@ -66,29 +59,32 @@ def experiment(subjects=[0]):
     train_ld = data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
     test_ld = data.DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
 
+    encode = Encoder(DIMENSIONS, ds[0][0].size(-2), ds[0][0].size(-1))
+    encode = encode.to(device)
+
     num_classes = len(ds.classes)
-    model = Model(num_classes, ds[0][0].size(-2), ds[0][0].size(-1))
+    model = Centroid(DIMENSIONS, num_classes)
     model = model.to(device)
 
     with torch.no_grad():
-        for samples, labels in tqdm(train_ld, desc="Training"):
+        for samples, targets in tqdm(train_ld, desc="Training"):
             samples = samples.to(device)
-            labels = labels.to(device)
+            targets = targets.to(device)
 
-            samples_hv = model.encode(samples)
-            model.classify.weight[labels] += samples_hv
-
-        model.classify.weight[:] = F.normalize(model.classify.weight)
+            sample_hv = encode(samples)
+            model.add(sample_hv, targets)
 
     accuracy = torchmetrics.Accuracy("multiclass", num_classes=num_classes)
 
     with torch.no_grad():
-        for samples, labels in tqdm(test_ld, desc="Testing"):
+        model.normalize()
+
+        for samples, targets in tqdm(test_ld, desc="Testing"):
             samples = samples.to(device)
 
-            outputs = model(samples)
-            predictions = torch.argmax(outputs, dim=-1)
-            accuracy.update(predictions.cpu(), labels)
+            sample_hv = encode(samples)
+            output = model(sample_hv, dot=True)
+            accuracy.update(output.cpu(), targets)
 
     print(f"Testing accuracy of {(accuracy.compute().item() * 100):.3f}%")
 

diff --git a/examples/graphhd.py b/examples/graphhd.py
@@ -11,6 +11,7 @@
 
 import torchhd
 from torchhd import embeddings
+from torchhd.models import Centroid
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("Using {} device".format(device))
@@ -80,55 +81,50 @@ def min_max_graph_size(graph_dataset):
     return min_num_nodes, max_num_nodes
 
 
-class Model(nn.Module):
-    def __init__(self, num_classes, size):
-        super(Model, self).__init__()
+class Encoder(nn.Module):
+    def __init__(self, out_features, size):
+        super(Encoder, self).__init__()
+        self.out_features = out_features
+        self.node_ids = embeddings.Random(size, out_features)
 
-        self.node_ids = embeddings.Random(size, DIMENSIONS)
-
-        self.classify = nn.Linear(DIMENSIONS, num_classes, bias=False)
-        self.classify.weight.data.fill_(0.0)
-
-    def encode(self, x):
+    def forward(self, x):
         pr = pagerank(x)
         pr_sort, pr_argsort = pr.sort()
 
-        node_id_hvs = torch.zeros((x.num_nodes, DIMENSIONS), device=device)
+        node_id_hvs = torch.zeros((x.num_nodes, self.out_features), device=device)
         node_id_hvs[pr_argsort] = self.node_ids.weight[: x.num_nodes]
 
         row, col = to_undirected(x.edge_index)
 
         hvs = torchhd.bind(node_id_hvs[row], node_id_hvs[col])
         return torchhd.multiset(hvs)
 
-    def forward(self, x):
-        enc = self.encode(x)
-        logit = self.classify(enc)
-        return logit
-
 
 min_graph_size, max_graph_size = min_max_graph_size(graphs)
-model = Model(graphs.num_classes, max_graph_size)
+encode = Encoder(DIMENSIONS, max_graph_size)
+encode = encode.to(device)
+
+model = Centroid(DIMENSIONS, graphs.num_classes)
 model = model.to(device)
 
 with torch.no_grad():
     for samples in tqdm(train_ld, desc="Training"):
         samples.edge_index = samples.edge_index.to(device)
         samples.y = samples.y.to(device)
 
-        samples_hv = model.encode(samples)
-        model.classify.weight[samples.y] += samples_hv
-
-    model.classify.weight[:] = F.normalize(model.classify.weight)
+        samples_hv = encode(samples).unsqueeze(0)
+        model.add(samples_hv, samples.y)
 
 accuracy = torchmetrics.Accuracy("multiclass", num_classes=graphs.num_classes)
 
 with torch.no_grad():
+    model.normalize()
+
     for samples in tqdm(test_ld, desc="Testing"):
         samples.edge_index = samples.edge_index.to(device)
 
-        outputs = model(samples)
-        predictions = torch.argmax(outputs, dim=-1).unsqueeze(0)
-        accuracy.update(predictions.cpu(), samples.y)
+        samples_hv = encode(samples).unsqueeze(0)
+        outputs = model(samples_hv, dot=True)
+        accuracy.update(outputs.cpu(), samples.y)
 
 print(f"Testing accuracy of {(accuracy.compute().item() * 100):.3f}%")
diff --git a/examples/language_recognition.py b/examples/language_recognition.py
@@ -9,6 +9,7 @@
 
 import torchhd
 from torchhd import embeddings
+from torchhd.models import Centroid
 from torchhd.datasets import EuropeanLanguages as Languages
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -54,49 +55,43 @@ def transform(x: str) -> torch.Tensor:
 test_ld = data.DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
 
 
-class Model(nn.Module):
-    def __init__(self, num_classes, size):
-        super(Model, self).__init__()
+class Encoder(nn.Module):
+    def __init__(self, out_features, size):
+        super(Encoder, self).__init__()
+        self.symbol = embeddings.Random(size, out_features, padding_idx=PADDING_IDX)
 
-        self.symbol = embeddings.Random(size, DIMENSIONS, padding_idx=PADDING_IDX)
-
-        self.classify = nn.Linear(DIMENSIONS, num_classes, bias=False)
-        self.classify.weight.data.fill_(0.0)
-
-    def encode(self, x):
+    def forward(self, x):
         symbols = self.symbol(x)
         sample_hv = torchhd.ngrams(symbols, n=3)
         return torchhd.hard_quantize(sample_hv)
 
-    def forward(self, x):
-        enc = self.encode(x)
-        logit = self.classify(enc)
-        return logit
 
+encode = Encoder(DIMENSIONS, NUM_TOKENS)
+encode = encode.to(device)
 
 num_classes = len(train_ds.classes)
-model = Model(num_classes, NUM_TOKENS)
+model = Centroid(DIMENSIONS, num_classes)
 model = model.to(device)
 
 with torch.no_grad():
     for samples, labels in tqdm(train_ld, desc="Training"):
         samples = samples.to(device)
         labels = labels.to(device)
 
-        samples_hv = model.encode(samples)
-        model.classify.weight[labels] += samples_hv
-
-    model.classify.weight[:] = F.normalize(model.classify.weight)
+        samples_hv = encode(samples)
+        model.add(samples_hv, labels)
 
 accuracy = torchmetrics.Accuracy("multiclass", num_classes=num_classes)
 
 with torch.no_grad():
+    model.normalize()
+
     for samples, labels in tqdm(test_ld, desc="Testing"):
         samples = samples.to(device)
         labels = labels.to(device)
 
-        outputs = model(samples)
-        predictions = torch.argmax(outputs, dim=-1)
-        accuracy.update(predictions.cpu(), labels)
+        samples_hv = encode(samples)
+        outputs = model(samples_hv, dot=True)
+        accuracy.update(outputs.cpu(), labels)
 
 print(f"Testing accuracy of {(accuracy.compute().item() * 100):.3f}%")
diff --git a/examples/mnist.py b/examples/mnist.py
@@ -9,6 +9,7 @@
 from tqdm import tqdm
 
 import torchhd
+from torchhd.models import Centroid
 from torchhd import embeddings
 
 
@@ -29,52 +30,45 @@
 test_ld = torch.utils.data.DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)
 
 
-class Model(nn.Module):
-    def __init__(self, num_classes, size):
-        super(Model, self).__init__()
-
+class Encoder(nn.Module):
+    def __init__(self, out_features, size, levels):
+        super(Encoder, self).__init__()
         self.flatten = torch.nn.Flatten()
+        self.position = embeddings.Random(size * size, out_features)
+        self.value = embeddings.Level(levels, out_features)
 
-        self.position = embeddings.Random(size * size, DIMENSIONS)
-        self.value = embeddings.Level(NUM_LEVELS, DIMENSIONS)
-
-        self.classify = nn.Linear(DIMENSIONS, num_classes, bias=False)
-        self.classify.weight.data.fill_(0.0)
-
-    def encode(self, x):
+    def forward(self, x):
         x = self.flatten(x)
         sample_hv = torchhd.bind(self.position.weight, self.value(x))
         sample_hv = torchhd.multiset(sample_hv)
         return torchhd.hard_quantize(sample_hv)
 
-    def forward(self, x):
-        enc = self.encode(x)
-        logit = self.classify(enc)
-        return logit
 
+encode = Encoder(DIMENSIONS, IMG_SIZE, NUM_LEVELS)
+encode = encode.to(device)
 
 num_classes = len(train_ds.classes)
-model = Model(num_classes, IMG_SIZE)
+model = Centroid(DIMENSIONS, num_classes)
 model = model.to(device)
 
 with torch.no_grad():
     for samples, labels in tqdm(train_ld, desc="Training"):
         samples = samples.to(device)
         labels = labels.to(device)
 
-        samples_hv = model.encode(samples)
-        model.classify.weight[labels] += samples_hv
-
-    model.classify.weight[:] = F.normalize(model.classify.weight)
+        samples_hv = encode(samples)
+        model.add(samples_hv, labels)
 
 accuracy = torchmetrics.Accuracy("multiclass", num_classes=num_classes)
 
 with torch.no_grad():
+    model.normalize()
+
     for samples, labels in tqdm(test_ld, desc="Testing"):
         samples = samples.to(device)
 
-        outputs = model(samples)
-        predictions = torch.argmax(outputs, dim=-1)
-        accuracy.update(predictions.cpu(), labels)
+        samples_hv = encode(samples)
+        outputs = model(samples_hv, dot=True)
+        accuracy.update(outputs.cpu(), labels)
 
 print(f"Testing accuracy of {(accuracy.compute().item() * 100):.3f}%")
-Original file line number
+Diff line change
@@ Expand Up @@
        torchhd
        embeddings
        structures
+       models
        datasets
        utils
@@ Expand Down @@