micromind-toolkit · Sentz98 · Dec 17, 2024 · Dec 24, 2024 · Jul 31, 2025
diff --git a/micromind/core.py b/micromind/core.py
@@ -17,6 +17,9 @@
 from accelerate import Accelerator
 from tqdm import tqdm
 import warnings
+import uuid
+from functools import partial
+
 
 from .utils.helpers import get_logger
 from .utils.checkpointer import Checkpointer
@@ -164,6 +167,8 @@ def __init__(self, hparams=None):
 
         self.current_epoch = 0
 
+        self._hooks_dict = {}  # Store hooks with progressive keys for later removal
+
     @abstractmethod
     def forward(self, batch):
         """
@@ -679,3 +684,64 @@ def test(self, datasets: Dict = {}, metrics: List[Metric] = []) -> None:
         logger.info(s_out)
 
         return test_metrics
+
+    def attach_hook_fn(self, hook_fn: Callable, to_record: List[str] = None):
+        if to_record is None:
+            logger.warning("No specific modules provided. Recording activations for all available modules.")
+            to_record = list(self.modules.keys())
+
+        hooks_created = []  # Track hooks created in this call
+
+        for entry in to_record:
+            module_name, *layer_name_parts = entry.split('.')
+            layer_name = '.'.join(layer_name_parts)
+
+            try:
+                module = self.modules[module_name]
+            except KeyError:
+                logger.error(f"Module '{module_name}' not found in self.modules. Skipping...")
+                continue
+
+            if not layer_name:
+                # Register hooks for all leaf layers in the module
+                for sub_name, layer in module.named_modules():
+                    if len(list(layer.children())) == 0:  # Only leaf layers
+                        hook_name = f"{module_name}.{sub_name}"
+                        hook_ref = layer.register_forward_hook(
+                            partial(hook_fn, name=hook_name)
+                        )
+                        hooks_created.append(hook_ref)
+            else:
+                # Register hook for the specific layer
+                layer = dict(module.named_modules()).get(layer_name)
+                if layer is None:
+                    logger.error(f"Layer '{entry}' not found in module '{module_name}'. Skipping...")
+                    continue
+
+                hook_ref = layer.register_forward_hook(
+                    partial(hook_fn, name=entry)
+                )
+                hooks_created.append(hook_ref)
+
+        if hooks_created:
+            hook_key = str(uuid.uuid4())  # Generate a unique key
+            self._hooks_dict[hook_key] = hooks_created
+            logger.info(f"Hooks successfully created with key: {hook_key}")
+        else:
+            logger.warning("No hooks were created. Please check the specified modules and layers.")
+
+        return hook_key
+
+    def detach_hook_fn(self, keys: List[str] = None):
+        if keys is None:
+            logger.error("No keys provided for detachment. Available keys: %s", list(self._hooks_dict.keys()))
+            return
+
+        for key in keys:
+            hook_refs = self._hooks_dict.pop(key, None)
+            if hook_refs:
+                for hook_ref in hook_refs:  # Ensure all hooks under this key are removed
+                    hook_ref.remove()
+                logger.info(f"All hooks with key '{key}' detached.")
+            else:
+                logger.error(f"Hook with key '{key}' not found. Available keys: %s", list(self._hooks_dict.keys()))
diff --git a/micromind/utils/__init__.py b/micromind/utils/__init__.py
@@ -1,3 +1,4 @@
 from . import yolo
 from . import checkpointer
 from .helpers import parse_configuration
+from .hook import ActivationHook
diff --git a/micromind/utils/hook.py b/micromind/utils/hook.py
@@ -0,0 +1,60 @@
+import torch
+import os
+import csv
+
+class ActivationHook:
+    def __init__(self):
+        # Data structure to store the activations
+        self.activations = {}
+
+    def __call__(self, module, input, output, name=None):
+        # Handle different output types
+        if isinstance(output, torch.Tensor):
+            activation_data = output.detach().cpu().numpy()
+            self.activations[name] = activation_data
+        elif isinstance(output, (list, tuple)):
+            self.activations[name] = []
+            for idx, out in enumerate(output):
+                if isinstance(out, torch.Tensor):
+                    activation_data = out.detach().cpu().numpy()
+                    self.activations[name].append(activation_data)
+                else:
+                    self.activations[name].append(None)  # Handle non-tensor elements gracefully
+        else:
+            # If output is neither Tensor nor list/tuple, handle as unsupported
+            self.activations[name] = None
+
+    def clear(self):
+        """Clears the stored activations."""
+        self.activations = {}
+
+
+class CSV_ActivationHook:
+    def __init__(self):
+
+        self._activation_file = "outputs/activations.csv"
+        # Ensure the directory existsWarning: 
+        os.makedirs(os.path.dirname(self._activation_file), exist_ok=True)
+
+        # Open the file and write headers (we'll append data later)
+        self._activation_file_handle = open(self._activation_file, "w", newline="")
+        self._csv_writer = csv.writer(self._activation_file_handle)
+
+        self._csv_writer.writerow(["Layer Name", "Layer Type", "Activation Shape", "Activations"])  # Headers
+
+
+    def __call__(self, module, input, output, name):
+        if isinstance(output, torch.Tensor):
+            activation_data = output.detach().cpu().numpy()
+            self._csv_writer.writerow([name, module.__class__.__name__, activation_data.shape, activation_data.tolist()])
+        elif isinstance(output, (list, tuple)):
+            for idx, out in enumerate(output):
+                if isinstance(out, torch.Tensor):
+                    activation_data = out.detach().cpu().numpy()
+                    self._csv_writer.writerow([f"{name}[{idx}]", module.__class__.__name__, activation_data.shape, activation_data.tolist()])
+        # Flush to ensure data is written to the file
+        self._activation_file_handle.flush()
+
+    def close_file(self):
+        self._activation_file_handle.close()
+        print(f"Activation file {self._activation_file} closed.")
diff --git a/recipes/image_classification/train.py b/recipes/image_classification/train.py
@@ -24,9 +24,12 @@
 
 import micromind as mm
 from micromind.networks import PhiNet, XiNet
-from micromind.utils import parse_configuration
+from micromind.utils import parse_configuration, ActivationHook
 import sys
 
+from torch.utils.data import Subset
+from torch.utils.data import DataLoader
+
 
 class ImageClassification(mm.MicroMind):
     """Implements an image classification class. Provides support
@@ -202,4 +205,24 @@ def acc(pred, batch):
         debug=hparams.debug,
     )
 
+    # Create a subset of the validation set containing only the first image
+    val_dataset = val_loader.dataset  # Assuming val_loader.dataset exists
+    subset_indices = [0]  # Only the first index
+    val_subset = Subset(val_dataset, subset_indices)
+
+    # Create a new DataLoader for the subset
+    one_loader = DataLoader(
+        val_subset,
+        batch_size=1,  # Load one image per batch
+        shuffle=False,
+        num_workers=hparams.num_workers if hasattr(hparams, "num_workers") else 0,
+    )
+
+    h = ActivationHook()
+    key = mind.attach_hook_fn(h, ("classifier._layers.3","classifier._layers.5._layers.6"))
+
+    mind.test(datasets={"test": one_loader}, metrics=[top1, top5])
+
+    mind.detach_hook_fn((key,))
+
     mind.test(datasets={"test": val_loader}, metrics=[top1, top5])
diff --git a/recipes/image_classification/visualize_act.py b/recipes/image_classification/visualize_act.py
@@ -0,0 +1,53 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import ast  # To safely evaluate nested lists
+
+# Function to process and plot activation histograms
+def plot_activation_histogram(csv_file):
+    # Read the CSV file
+    df = pd.read_csv(csv_file)
+
+    # Iterate through each layer
+    for index, row in df.iterrows():
+        layer_name = row['Layer Name']
+        layer_type = row['Layer Type']
+        activation_shape = row['Activation Shape']
+        activations = row['Activations']
+
+        # Convert activations string to a nested list of floats
+        try:
+            activation_values = np.array(ast.literal_eval(activations))  # Safely parse nested lists
+            flattened_activations = activation_values.flatten()  # Flatten the array
+        except (ValueError, SyntaxError):
+            print(f"Error processing activations for layer: {layer_name}")
+            continue
+
+        # Compute statistics
+        min_value = np.min(flattened_activations)
+        max_value = np.max(flattened_activations)
+        mean_value = np.mean(flattened_activations)
+
+        # Plot histogram
+        plt.figure(figsize=(8, 6))
+        plt.hist(flattened_activations, bins=50, color='blue', alpha=0.7, edgecolor='black')
+        plt.title(f"Histogram of Activations\nLayer: {layer_name} ({layer_type})")
+        plt.xlabel("Activation Values")
+        plt.ylabel("Frequency")
+
+        # Annotate with statistics and shape
+        stats_text = (f"Activation Shape: {activation_shape}\n"
+                      f"Min: {min_value:.4f}\nMax: {max_value:.4f}\nMean: {mean_value:.4f}")
+        plt.annotate(stats_text, xy=(0.7, 0.6), xycoords='axes fraction', fontsize=10, 
+                     bbox=dict(boxstyle="round,pad=0.3", edgecolor='gray', facecolor='lightyellow'))
+
+        # Show or save the plot
+        plt.tight_layout()
+        plt.show()
+
+if __name__ == "__main__":
+    # Input CSV file path
+    csv_file_path = 'outputs/activations.csv'  # Replace with your actual file path
+
+    # Run the function
+    plot_activation_histogram(csv_file_path)
diff --git a/recipes/object_detection/train.py b/recipes/object_detection/train.py
@@ -215,12 +215,12 @@ def replace_datafolder(hparams, data_cfg):
             data_cfg[key] = [data_cfg[key]]
         new_list = []
         for tmp in data_cfg[key]:
+            original_tmp = tmp
             if hasattr(hparams, "data_dir"):
                 if hparams.data_dir != data_cfg["path"]:
-                    tmp = str(tmp).replace(data_cfg["path"], "")
-                    tmp = tmp[1:] if tmp[0] == "/" else tmp
+                    tmp = str(tmp).replace(data_cfg["path"], "").lstrip("/")
                     tmp = os.path.join(hparams.data_dir, tmp)
-                    new_list.append(tmp)
+            new_list.append(tmp)  # Append regardless of replacement
         data_cfg[key] = new_list
 
     data_cfg["path"] = hparams.data_dir
@@ -238,11 +238,11 @@ def replace_datafolder(hparams, data_cfg):
         print(f"Setting input shape to {hparams.input_shape}.")
 
     m_cfg, data_cfg = load_config(hparams.data_cfg)
-
+    
     # check if specified path for images is different, correct it in case
     data_cfg = replace_datafolder(hparams, data_cfg)
     m_cfg.imgsz = hparams.input_shape[-1]  # temp solution
-
+    
     train_loader, val_loader = create_loaders(m_cfg, data_cfg, hparams.batch_size)
 
     exp_folder = mm.utils.checkpointer.create_experiment_folder(