knosing
diff --git a/‎docs/experiments/nlp_autoregression.html
Lines changed: 192 additions & 168 deletions b/‎docs/experiments/nlp_autoregression.html
Lines changed: 192 additions & 168 deletions
diff --git a/‎docs/experiments/nlp_classification.html
Lines changed: 185 additions & 161 deletions b/‎docs/experiments/nlp_classification.html
Lines changed: 185 additions & 161 deletions
diff --git a/‎docs/normalization/deep_norm/experiment.html
Lines changed: 74 additions & 74 deletions b/‎docs/normalization/deep_norm/experiment.html
Lines changed: 74 additions & 74 deletions
diff --git a/‎docs/normalization/deep_norm/index.html
Lines changed: 49 additions & 49 deletions b/‎docs/normalization/deep_norm/index.html
Lines changed: 49 additions & 49 deletions
diff --git a/‎docs/sitemap.xml
Lines changed: 3 additions & 3 deletions b/‎docs/sitemap.xml
Lines changed: 3 additions & 3 deletions
diff --git a/‎labml_nn/experiments/nlp_autoregression.py
Lines changed: 12 additions & 2 deletions b/‎labml_nn/experiments/nlp_autoregression.py
Lines changed: 12 additions & 2 deletions
diff --git a/‎labml_nn/experiments/nlp_classification.py
Lines changed: 12 additions & 2 deletions b/‎labml_nn/experiments/nlp_classification.py
Lines changed: 12 additions & 2 deletions
diff --git a/‎labml_nn/transformers/basic/autoregressive_experiment.ipynb
Lines changed: 65 additions & 19 deletions b/‎labml_nn/transformers/basic/autoregressive_experiment.ipynb
Lines changed: 65 additions & 19 deletions
@@ -190,14 +190,14 @@
 
     <url>
       <loc>https://nn.labml.ai/normalization/deep_norm/index.html</loc>
-      <lastmod>2022-04-10T16:30:00+00:00</lastmod>
+      <lastmod>2022-04-23T16:30:00+00:00</lastmod>
       <priority>1.00</priority>
     </url>
 
 
     <url>
       <loc>https://nn.labml.ai/normalization/deep_norm/experiment.html</loc>
-      <lastmod>2022-04-10T16:30:00+00:00</lastmod>
+      <lastmod>2022-04-23T16:30:00+00:00</lastmod>
       <priority>1.00</priority>
     </url>
 
@@ -260,7 +260,7 @@
 
     <url>
       <loc>https://nn.labml.ai/resnet/index.html</loc>
-      <lastmod>2021-10-21T16:30:00+00:00</lastmod>
+      <lastmod>2022-04-10T16:30:00+00:00</lastmod>
       <priority>1.00</priority>
     </url>
 
 
@@ -91,6 +91,16 @@ class NLPAutoRegressionConfigs(TrainValidConfigs):
     # Data loaders shuffle with replacement
     dataloader_shuffle_with_replacement: bool = False
 
+    # Whether to log model parameters and gradients (once per epoch).
+    # These are summarized stats per layer, but it could still lead
+    # to many indicators for very deep networks.
+    is_log_model_params_grads: bool = False
+
+    # Whether to log model activations (once per epoch).
+    # These are summarized stats per layer, but it could still lead
+    # to many indicators for very deep networks.
+    is_log_model_activations: bool = False
+
     def init(self):
         """
         ### Initialization
@@ -126,7 +136,7 @@ def step(self, batch: any, batch_idx: BatchIndex):
             tracker.add_global_step(data.shape[0] * data.shape[1])
 
         # Whether to capture model outputs
-        with self.mode.update(is_log_activations=batch_idx.is_last):
+        with self.mode.update(is_log_activations=batch_idx.is_last and self.is_log_model_activations):
             # Get model outputs.
             # It's returning a tuple for states when using RNNs.
             # This is not implemented yet. 😜
@@ -151,7 +161,7 @@ def step(self, batch: any, batch_idx: BatchIndex):
             # Take optimizer step
             self.optimizer.step()
             # Log the model parameters and gradients on last batch of every epoch
-            if batch_idx.is_last:
+            if batch_idx.is_last and self.is_log_model_params_grads:
                 tracker.add('model', self.model)
             # Clear the gradients
             self.optimizer.zero_grad()
 
@@ -74,6 +74,16 @@ class NLPClassificationConfigs(TrainValidConfigs):
     # Validation data loader
     valid_loader: DataLoader = 'ag_news'
 
+    # Whether to log model parameters and gradients (once per epoch).
+    # These are summarized stats per layer, but it could still lead
+    # to many indicators for very deep networks.
+    is_log_model_params_grads: bool = False
+
+    # Whether to log model activations (once per epoch).
+    # These are summarized stats per layer, but it could still lead
+    # to many indicators for very deep networks.
+    is_log_model_activations: bool = False
+
     def init(self):
         """
         ### Initialization
@@ -102,7 +112,7 @@ def step(self, batch: any, batch_idx: BatchIndex):
             tracker.add_global_step(data.shape[1])
 
         # Whether to capture model outputs
-        with self.mode.update(is_log_activations=batch_idx.is_last):
+        with self.mode.update(is_log_activations=batch_idx.is_last and self.is_log_model_activations):
             # Get model outputs.
             # It's returning a tuple for states when using RNNs.
             # This is not implemented yet. 😜
@@ -125,7 +135,7 @@ def step(self, batch: any, batch_idx: BatchIndex):
             # Take optimizer step
             self.optimizer.step()
             # Log the model parameters and gradients on last batch of every epoch
-            if batch_idx.is_last:
+            if batch_idx.is_last and self.is_log_model_params_grads:
                 tracker.add('model', self.model)
             # Clear the gradients
             self.optimizer.zero_grad()
 
@@ -17,7 +17,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "AYV_dMVDxyc2"
+    "id": "AYV_dMVDxyc2",
+    "pycharm": {
+     "name": "#%% md\n"
+    }
    },
    "source": [
     "[![Github](https://img.shields.io/github/stars/labmlai/annotated_deep_learning_paper_implementations?style=social)](https://github.com/labmlai/annotated_deep_learning_paper_implementations)\n",
@@ -34,7 +37,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "AahG_i2y5tY9"
+    "id": "AahG_i2y5tY9",
+    "pycharm": {
+     "name": "#%% md\n"
+    }
    },
    "source": [
     "Install the packages"
@@ -47,7 +53,10 @@
     "colab": {
      "base_uri": "https://localhost:8080/"
     },
-    "outputId": "cf107fb2-4d50-4c67-af34-367624553421"
+    "outputId": "cf107fb2-4d50-4c67-af34-367624553421",
+    "pycharm": {
+     "name": "#%%\n"
+    }
    },
    "source": [
     "!pip install labml-nn comet_ml"
@@ -58,7 +67,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "SE2VUQ6L5zxI"
+    "id": "SE2VUQ6L5zxI",
+    "pycharm": {
+     "name": "#%% md\n"
+    }
    },
    "source": [
     "Imports"
@@ -67,7 +79,10 @@
   {
    "cell_type": "code",
    "metadata": {
-    "id": "0hJXx_g0wS2C"
+    "id": "0hJXx_g0wS2C",
+    "pycharm": {
+     "name": "#%%\n"
+    }
    },
    "source": [
     "import torch\n",
@@ -112,7 +127,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "Lpggo0wM6qb-"
+    "id": "Lpggo0wM6qb-",
+    "pycharm": {
+     "name": "#%% md\n"
+    }
    },
    "source": [
     "Create an experiment"
@@ -121,7 +139,10 @@
   {
    "cell_type": "code",
    "metadata": {
-    "id": "bFcr9k-l4cAg"
+    "id": "bFcr9k-l4cAg",
+    "pycharm": {
+     "name": "#%%\n"
+    }
    },
    "source": [
     "experiment.create(name=\"transformer\", writers={'screen', 'web_api', 'comet'})"
@@ -132,7 +153,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "-OnHLi626tJt"
+    "id": "-OnHLi626tJt",
+    "pycharm": {
+     "name": "#%% md\n"
+    }
    },
    "source": [
     "Initialize configurations"
@@ -141,7 +165,10 @@
   {
    "cell_type": "code",
    "metadata": {
-    "id": "Piz0c5f44hRo"
+    "id": "Piz0c5f44hRo",
+    "pycharm": {
+     "name": "#%%\n"
+    }
    },
    "source": [
     "conf = Configs()"
@@ -152,7 +179,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "wwMzCqpD6vkL"
+    "id": "wwMzCqpD6vkL",
+    "pycharm": {
+     "name": "#%% md\n"
+    }
    },
    "source": [
     "Set experiment configurations and assign a configurations dictionary to override configurations"
@@ -166,7 +196,10 @@
      "height": 17
     },
     "id": "e6hmQhTw4nks",
-    "outputId": "29634715-42f4-4405-fb11-fc9522608627"
+    "outputId": "29634715-42f4-4405-fb11-fc9522608627",
+    "pycharm": {
+     "name": "#%%\n"
+    }
    },
    "source": [
     "experiment.configs(conf, {\n",
@@ -205,7 +238,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "EvI7MtgJ61w5"
+    "id": "EvI7MtgJ61w5",
+    "pycharm": {
+     "name": "#%% md\n"
+    }
    },
    "source": [
     "Set PyTorch models for loading and saving"
@@ -219,7 +255,10 @@
      "height": 255
     },
     "id": "GDlt7dp-5ALt",
-    "outputId": "e7548e8f-c541-4618-dc5a-1597cae42003"
+    "outputId": "e7548e8f-c541-4618-dc5a-1597cae42003",
+    "pycharm": {
+     "name": "#%%\n"
+    }
    },
    "source": [
     "experiment.add_pytorch_models({'model': conf.model})"
@@ -230,7 +269,10 @@
   {
    "cell_type": "markdown",
    "metadata": {
-    "id": "KJZRf8527GxL"
+    "id": "KJZRf8527GxL",
+    "pycharm": {
+     "name": "#%% md\n"
+    }
    },
    "source": [
     "Start the experiment and run the training loop."
@@ -244,7 +286,10 @@
      "height": 1000
     },
     "id": "aIAWo7Fw5DR8",
-    "outputId": "db979785-bfe3-4eda-d3eb-8ccbe61053e5"
+    "outputId": "db979785-bfe3-4eda-d3eb-8ccbe61053e5",
+    "pycharm": {
+     "name": "#%%\n"
+    }
    },
    "source": [
     "# Start the experiment\n",
@@ -257,11 +302,12 @@
   {
    "cell_type": "code",
    "metadata": {
-    "id": "oBXXlP2b7XZO"
+    "id": "oBXXlP2b7XZO",
+    "pycharm": {
+     "name": "#%%\n"
+    }
    },
-   "source": [
-    ""
-   ],
+   "source": [],
    "execution_count": null,
    "outputs": []
   }