Merge pull request #71 from weissercn/main

Addressing notebook issues
jbloomAus · Apr 8, 2024 · 8417505 · 8417505
2 parents 4d7d1e7 + 1db0b5a
commit 8417505
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 3 deletions.
diff --git a/sae_lens/training/sparse_autoencoder.py b/sae_lens/training/sparse_autoencoder.py
@@ -13,6 +13,7 @@
 from transformer_lens.hook_points import HookedRootModule, HookPoint
 
 from sae_lens.training.config import LanguageModelSAERunnerConfig
+from sae_lens.training.utils import BackwardsCompatiblePickleClass
 
 
 class ForwardOutput(NamedTuple):
@@ -241,7 +242,11 @@ def load_from_pretrained(cls, path: str):
         if path.endswith(".pt"):
             try:
                 if torch.backends.mps.is_available():
-                    state_dict = torch.load(path, map_location="mps")
+                    state_dict = torch.load(
+                        path,
+                        map_location="mps",
+                        pickle_module=BackwardsCompatiblePickleClass,
+                    )
                     state_dict["cfg"].device = "mps"
                 else:
                     state_dict = torch.load(path)

diff --git a/sae_lens/training/utils.py b/sae_lens/training/utils.py
@@ -14,6 +14,10 @@ def find_class(self, module: str, name: str):
         return super().find_class(module, name)
 
 
+class BackwardsCompatiblePickleClass:
+    Unpickler = BackwardsCompatibleUnpickler
+
+
 def shuffle_activations_pairwise(datapath: str, buffer_idx_range: Tuple[int, int]):
     """
     Shuffles two buffers on disk.

diff --git a/tutorials/logits_lens_with_features.ipynb b/tutorials/logits_lens_with_features.ipynb
@@ -226,6 +226,8 @@
       "metadata": {},
       "outputs": [],
       "source": [
+        "import nltk\n",
+        "nltk.download('averaged_perceptron_tagger')\n",
         "# get the vocab we need to filter to formulate token sets.\n",
         "vocab = model.tokenizer.get_vocab()  # type: ignore\n",
         "\n",
@@ -608,7 +610,7 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "for category in [\"starts_with_space\"]:\n",
+        "for category in [\"boys_names\"]:\n",
         "    plot_top_k_feature_projections_by_token_and_category(\n",
         "        token_set_selected,\n",
         "        df_enrichment_scores,\n",
@@ -655,7 +657,7 @@
         "\n",
         "fig = px.area(\n",
         "    tmp_df,\n",
-        "    title=\"Kurtosis by Layer\",\n",
+        "    title=\"Skewness by Layer\",\n",
         "    width=800,\n",
         "    height=600,\n",
         "    color_discrete_sequence=px.colors.sequential.Turbo,\n",