Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions micromind/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
from accelerate import Accelerator
from tqdm import tqdm
import warnings
import uuid
from functools import partial


from .utils.helpers import get_logger
from .utils.checkpointer import Checkpointer
Expand Down Expand Up @@ -164,6 +167,8 @@ def __init__(self, hparams=None):

self.current_epoch = 0

self._hooks_dict = {} # Store hooks with progressive keys for later removal

@abstractmethod
def forward(self, batch):
"""
Expand Down Expand Up @@ -679,3 +684,64 @@ def test(self, datasets: Dict = {}, metrics: List[Metric] = []) -> None:
logger.info(s_out)

return test_metrics

def attach_hook_fn(self, hook_fn: Callable, to_record: List[str] = None):
if to_record is None:
logger.warning("No specific modules provided. Recording activations for all available modules.")
to_record = list(self.modules.keys())

hooks_created = [] # Track hooks created in this call

for entry in to_record:
module_name, *layer_name_parts = entry.split('.')
layer_name = '.'.join(layer_name_parts)

try:
module = self.modules[module_name]
except KeyError:
logger.error(f"Module '{module_name}' not found in self.modules. Skipping...")
continue

if not layer_name:
# Register hooks for all leaf layers in the module
for sub_name, layer in module.named_modules():
if len(list(layer.children())) == 0: # Only leaf layers
hook_name = f"{module_name}.{sub_name}"
hook_ref = layer.register_forward_hook(
partial(hook_fn, name=hook_name)
)
hooks_created.append(hook_ref)
else:
# Register hook for the specific layer
layer = dict(module.named_modules()).get(layer_name)
if layer is None:
logger.error(f"Layer '{entry}' not found in module '{module_name}'. Skipping...")
continue

hook_ref = layer.register_forward_hook(
partial(hook_fn, name=entry)
)
hooks_created.append(hook_ref)

if hooks_created:
hook_key = str(uuid.uuid4()) # Generate a unique key
self._hooks_dict[hook_key] = hooks_created
logger.info(f"Hooks successfully created with key: {hook_key}")
else:
logger.warning("No hooks were created. Please check the specified modules and layers.")

return hook_key

def detach_hook_fn(self, keys: List[str] = None):
if keys is None:
logger.error("No keys provided for detachment. Available keys: %s", list(self._hooks_dict.keys()))
return

for key in keys:
hook_refs = self._hooks_dict.pop(key, None)
if hook_refs:
for hook_ref in hook_refs: # Ensure all hooks under this key are removed
hook_ref.remove()
logger.info(f"All hooks with key '{key}' detached.")
else:
logger.error(f"Hook with key '{key}' not found. Available keys: %s", list(self._hooks_dict.keys()))
1 change: 1 addition & 0 deletions micromind/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import yolo
from . import checkpointer
from .helpers import parse_configuration
from .hook import ActivationHook
60 changes: 60 additions & 0 deletions micromind/utils/hook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import torch
import os
import csv

class ActivationHook:
def __init__(self):
# Data structure to store the activations
self.activations = {}

def __call__(self, module, input, output, name=None):
# Handle different output types
if isinstance(output, torch.Tensor):
activation_data = output.detach().cpu().numpy()
self.activations[name] = activation_data
elif isinstance(output, (list, tuple)):
self.activations[name] = []
for idx, out in enumerate(output):
if isinstance(out, torch.Tensor):
activation_data = out.detach().cpu().numpy()
self.activations[name].append(activation_data)
else:
self.activations[name].append(None) # Handle non-tensor elements gracefully
else:
# If output is neither Tensor nor list/tuple, handle as unsupported
self.activations[name] = None

def clear(self):
"""Clears the stored activations."""
self.activations = {}


class CSV_ActivationHook:
def __init__(self):

self._activation_file = "outputs/activations.csv"
# Ensure the directory existsWarning:
os.makedirs(os.path.dirname(self._activation_file), exist_ok=True)

# Open the file and write headers (we'll append data later)
self._activation_file_handle = open(self._activation_file, "w", newline="")
self._csv_writer = csv.writer(self._activation_file_handle)

self._csv_writer.writerow(["Layer Name", "Layer Type", "Activation Shape", "Activations"]) # Headers


def __call__(self, module, input, output, name):
if isinstance(output, torch.Tensor):
activation_data = output.detach().cpu().numpy()
self._csv_writer.writerow([name, module.__class__.__name__, activation_data.shape, activation_data.tolist()])
elif isinstance(output, (list, tuple)):
for idx, out in enumerate(output):
if isinstance(out, torch.Tensor):
activation_data = out.detach().cpu().numpy()
self._csv_writer.writerow([f"{name}[{idx}]", module.__class__.__name__, activation_data.shape, activation_data.tolist()])
# Flush to ensure data is written to the file
self._activation_file_handle.flush()

def close_file(self):
self._activation_file_handle.close()
print(f"Activation file {self._activation_file} closed.")
25 changes: 24 additions & 1 deletion recipes/image_classification/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,12 @@

import micromind as mm
from micromind.networks import PhiNet, XiNet
from micromind.utils import parse_configuration
from micromind.utils import parse_configuration, ActivationHook
import sys

from torch.utils.data import Subset
from torch.utils.data import DataLoader


class ImageClassification(mm.MicroMind):
"""Implements an image classification class. Provides support
Expand Down Expand Up @@ -202,4 +205,24 @@ def acc(pred, batch):
debug=hparams.debug,
)

# Create a subset of the validation set containing only the first image
val_dataset = val_loader.dataset # Assuming val_loader.dataset exists
subset_indices = [0] # Only the first index
val_subset = Subset(val_dataset, subset_indices)

# Create a new DataLoader for the subset
one_loader = DataLoader(
val_subset,
batch_size=1, # Load one image per batch
shuffle=False,
num_workers=hparams.num_workers if hasattr(hparams, "num_workers") else 0,
)

h = ActivationHook()
key = mind.attach_hook_fn(h, ("classifier._layers.3","classifier._layers.5._layers.6"))

mind.test(datasets={"test": one_loader}, metrics=[top1, top5])

mind.detach_hook_fn((key,))

mind.test(datasets={"test": val_loader}, metrics=[top1, top5])
53 changes: 53 additions & 0 deletions recipes/image_classification/visualize_act.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import ast # To safely evaluate nested lists

# Function to process and plot activation histograms
def plot_activation_histogram(csv_file):
# Read the CSV file
df = pd.read_csv(csv_file)

# Iterate through each layer
for index, row in df.iterrows():
layer_name = row['Layer Name']
layer_type = row['Layer Type']
activation_shape = row['Activation Shape']
activations = row['Activations']

# Convert activations string to a nested list of floats
try:
activation_values = np.array(ast.literal_eval(activations)) # Safely parse nested lists
flattened_activations = activation_values.flatten() # Flatten the array
except (ValueError, SyntaxError):
print(f"Error processing activations for layer: {layer_name}")
continue

# Compute statistics
min_value = np.min(flattened_activations)
max_value = np.max(flattened_activations)
mean_value = np.mean(flattened_activations)

# Plot histogram
plt.figure(figsize=(8, 6))
plt.hist(flattened_activations, bins=50, color='blue', alpha=0.7, edgecolor='black')
plt.title(f"Histogram of Activations\nLayer: {layer_name} ({layer_type})")
plt.xlabel("Activation Values")
plt.ylabel("Frequency")

# Annotate with statistics and shape
stats_text = (f"Activation Shape: {activation_shape}\n"
f"Min: {min_value:.4f}\nMax: {max_value:.4f}\nMean: {mean_value:.4f}")
plt.annotate(stats_text, xy=(0.7, 0.6), xycoords='axes fraction', fontsize=10,
bbox=dict(boxstyle="round,pad=0.3", edgecolor='gray', facecolor='lightyellow'))

# Show or save the plot
plt.tight_layout()
plt.show()

if __name__ == "__main__":
# Input CSV file path
csv_file_path = 'outputs/activations.csv' # Replace with your actual file path

# Run the function
plot_activation_histogram(csv_file_path)
10 changes: 5 additions & 5 deletions recipes/object_detection/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,12 +215,12 @@ def replace_datafolder(hparams, data_cfg):
data_cfg[key] = [data_cfg[key]]
new_list = []
for tmp in data_cfg[key]:
original_tmp = tmp
if hasattr(hparams, "data_dir"):
if hparams.data_dir != data_cfg["path"]:
tmp = str(tmp).replace(data_cfg["path"], "")
tmp = tmp[1:] if tmp[0] == "/" else tmp
tmp = str(tmp).replace(data_cfg["path"], "").lstrip("/")
tmp = os.path.join(hparams.data_dir, tmp)
new_list.append(tmp)
new_list.append(tmp) # Append regardless of replacement
data_cfg[key] = new_list

data_cfg["path"] = hparams.data_dir
Expand All @@ -238,11 +238,11 @@ def replace_datafolder(hparams, data_cfg):
print(f"Setting input shape to {hparams.input_shape}.")

m_cfg, data_cfg = load_config(hparams.data_cfg)

# check if specified path for images is different, correct it in case
data_cfg = replace_datafolder(hparams, data_cfg)
m_cfg.imgsz = hparams.input_shape[-1] # temp solution

train_loader, val_loader = create_loaders(m_cfg, data_cfg, hparams.batch_size)

exp_folder = mm.utils.checkpointer.create_experiment_folder(
Expand Down