Fix activation resampler normalization dimension (ai-safety-foundation#155)

alan-cooney · web-flow · commit 3a15732621ce · 2023-12-11T12:38:23.000-03:00
diff --git a/sparse_autoencoder/activation_resampler/activation_resampler.py b/sparse_autoencoder/activation_resampler/activation_resampler.py
@@ -358,7 +358,7 @@ def renormalize_and_scale(
         # Calculate the average norm of the encoder weights for alive neurons.
         detached_encoder_weight = encoder_weight.detach()  # Don't track gradients
         alive_encoder_weights: Float[
-            Tensor, Axis.names(Axis.LEARNT_FEATURE, Axis.INPUT_OUTPUT_FEATURE)
+            Tensor, Axis.names(Axis.ALIVE_FEATURE, Axis.INPUT_OUTPUT_FEATURE)
         ] = detached_encoder_weight[alive_neuron_mask, :]
         average_alive_norm: Float[Tensor, Axis.SINGLE_ITEM] = alive_encoder_weights.norm(
             dim=-1
@@ -416,7 +416,7 @@ def resample_dead_neurons(
             # vector for the dead autoencoder neuron.
             renormalized_input: Float[
                 Tensor, Axis.names(Axis.DEAD_FEATURE, Axis.INPUT_OUTPUT_FEATURE)
-            ] = torch.nn.functional.normalize(sampled_input, dim=0)
+            ] = torch.nn.functional.normalize(sampled_input, dim=-1)
             dead_decoder_weight_updates = rearrange(
                 renormalized_input, "dead_neuron input_feature -> input_feature dead_neuron"
             )