add note about numerical instability

rasbt · rasbt · commit f95270577394 · 2017-12-16T17:47:07.000-05:00
diff --git a/code/ch12/ch12.ipynb b/code/ch12/ch12.ipynb
@@ -706,6 +706,21 @@
     "        term1 = -y_enc * (np.log(output))\n",
     "        term2 = (1. - y_enc) * np.log(1. - output)\n",
     "        cost = np.sum(term1 - term2) + L2_term\n",
+    "        \n",
+    "        # If you are applying this cost function to other\n",
+    "        # datasets where activation\n",
+    "        # values maybe become more extreme (closer to zero or 1)\n",
+    "        # you may encounter \"ZeroDivisionError\"s due to numerical\n",
+    "        # instabilities in Python & NumPy for the current implementation.\n",
+    "        # I.e., the code tries to evaluate log(0), which is undefined.\n",
+    "        # To address this issue, you could add a small constant to the\n",
+    "        # activation values that are passed to the log function.\n",
+    "        #\n",
+    "        # For example:\n",
+    "        #\n",
+    "        # term1 = -y_enc * (np.log(output + 1e-5))\n",
+    "        # term2 = (1. - y_enc) * np.log(1. - output + 1e-5)\n",
+    "        \n",
     "        return cost\n",
     "\n",
     "    def predict(self, X):\n",
diff --git a/code/ch12/ch12.py b/code/ch12/ch12.py
@@ -331,6 +331,21 @@ def _compute_cost(self, y_enc, output):
         term1 = -y_enc * (np.log(output))
         term2 = (1. - y_enc) * np.log(1. - output)
         cost = np.sum(term1 - term2) + L2_term
+
+        # If you are applying this cost function to other
+        # datasets where activation
+        # values maybe become more extreme (closer to zero or 1)
+        # you may encounter "ZeroDivisionError"s due to numerical
+        # instabilities in Python & NumPy for the current implementation.
+        # I.e., the code tries to evaluate log(0), which is undefined.
+        # To address this issue, you could add a small constant to the
+        # activation values that are passed to the log function.
+        #
+        # For example:
+        #
+        # term1 = -y_enc * (np.log(output + 1e-5))
+        # term2 = (1. - y_enc) * np.log(1. - output + 1e-5)
+
         return cost
 
     def predict(self, X):
diff --git a/code/ch12/neuralnet.py b/code/ch12/neuralnet.py
@@ -109,6 +109,21 @@ def _compute_cost(self, y_enc, output):
         term1 = -y_enc * (np.log(output))
         term2 = (1. - y_enc) * np.log(1. - output)
         cost = np.sum(term1 - term2) + L2_term
+
+        # If you are applying this cost function to other
+        # datasets where activation
+        # values maybe become more extreme (closer to zero or 1)
+        # you may encounter "ZeroDivisionError"s due to numerical
+        # instabilities in Python & NumPy for the current implementation.
+        # I.e., the code tries to evaluate log(0), which is undefined.
+        # To address this issue, you could add a small constant to the
+        # activation values that are passed to the log function.
+        #
+        # For example:
+        #
+        # term1 = -y_enc * (np.log(output + 1e-5))
+        # term2 = (1. - y_enc) * np.log(1. - output + 1e-5)
+
         return cost
 
     def predict(self, X):