From f9edbfee032820ce684eda57880d3943a12dbfd5 Mon Sep 17 00:00:00 2001
From: Leonard Lausen <leonard@lausen.nl>
Date: Tue, 24 Apr 2018 17:18:13 -0700
Subject: [PATCH] Fix language model ipynb (#83)

* Fix language model ipynb

* Doc fix in word_embedding.ipynb

* Fix wemb eval paths

* Fix warning

* Rerun language_model.ipynb
---
 .../language_model/language_model.ipynb       | 121 +++++++++---------
 .../word_embedding/word_embedding.ipynb       |   2 +-
 .../word_embedding_evaluation.ipynb           |   2 +-
 3 files changed, 62 insertions(+), 63 deletions(-)
diff --git a/docs/examples/language_model/language_model.ipynb b/docs/examples/language_model/language_model.ipynb
index 318e89c15e..1500d91971 100644
--- a/docs/examples/language_model/language_model.ipynb
+++ b/docs/examples/language_model/language_model.ipynb
@@ -38,6 +38,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
     "import time\n",
     "import math\n",
     "\n",
@@ -128,15 +131,14 @@
      "output_type": "stream",
      "text": [
       "StandardRNN(\n",
-      "  (decoder): HybridSequential(\n",
-      "    (0): Dropout(p = 0.2, axes=())\n",
-      "    (1): Dense(200 -> 33278, linear)\n",
-      "  )\n",
       "  (embedding): HybridSequential(\n",
       "    (0): Embedding(33278 -> 200, float32)\n",
       "    (1): Dropout(p = 0.2, axes=())\n",
       "  )\n",
-      "  (encoder): LSTM(200 -> 800, TNC, num_layers=2, dropout=0.2)\n",
+      "  (encoder): LSTM(200 -> 200.0, TNC, num_layers=2, dropout=0.2)\n",
+      "  (decoder): HybridSequential(\n",
+      "    (0): Dense(200 -> 33278, linear)\n",
+      "  )\n",
       ")\n",
       "Vocab(size=33278, unk=\"<unk>\", reserved=\"['<eos>']\")\n"
      ]
@@ -239,7 +241,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def train(model, train_data, val_data, test_data, epochs):\n",
+    "def train(model, train_data, val_data, test_data, epochs, lr):\n",
     "    best_val = float(\"Inf\")\n",
     "    start_train_time = time.time()\n",
     "    parameters = model.collect_params().values()\n",
@@ -321,42 +323,42 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[Epoch 0 Batch 100/746] loss 8.05, ppl 3129.00, throughput 1395.06 samples/s\n",
-      "[Epoch 0 Batch 200/746] loss 7.24, ppl 1399.34, throughput 1895.63 samples/s\n",
-      "[Epoch 0 Batch 300/746] loss 6.96, ppl 1053.46, throughput 1996.78 samples/s\n",
-      "[Epoch 0 Batch 400/746] loss 6.70, ppl 813.17, throughput 1890.44 samples/s\n",
-      "[Epoch 0 Batch 500/746] loss 6.50, ppl 667.19, throughput 1883.28 samples/s\n",
-      "[Epoch 0 Batch 600/746] loss 6.33, ppl 563.24, throughput 1975.94 samples/s\n",
-      "[Epoch 0 Batch 700/746] loss 6.22, ppl 501.24, throughput 1930.41 samples/s\n",
-      "[Epoch 0] throughput 1820.23 samples/s\n",
-      "[Epoch 0] time cost 36.12s, valid loss 5.90, valid ppl 366.51\n",
-      "test loss 5.82, test ppl 337.05\n",
-      "[Epoch 1 Batch 100/746] loss 6.10, ppl 446.92, throughput 1932.23 samples/s\n",
-      "[Epoch 1 Batch 200/746] loss 6.00, ppl 404.51, throughput 1877.29 samples/s\n",
-      "[Epoch 1 Batch 300/746] loss 5.94, ppl 380.40, throughput 1902.84 samples/s\n",
-      "[Epoch 1 Batch 400/746] loss 5.89, ppl 362.86, throughput 1944.96 samples/s\n",
-      "[Epoch 1 Batch 500/746] loss 5.79, ppl 325.73, throughput 1867.69 samples/s\n",
-      "[Epoch 1 Batch 600/746] loss 5.70, ppl 298.65, throughput 1983.97 samples/s\n",
-      "[Epoch 1 Batch 700/746] loss 5.64, ppl 282.85, throughput 1892.81 samples/s\n",
-      "[Epoch 1] throughput 1915.09 samples/s\n",
-      "[Epoch 1] time cost 34.65s, valid loss 5.53, valid ppl 251.99\n",
-      "test loss 5.44, test ppl 229.78\n",
-      "[Epoch 2 Batch 100/746] loss 5.63, ppl 278.83, throughput 1880.85 samples/s\n",
-      "[Epoch 2 Batch 200/746] loss 5.56, ppl 259.11, throughput 1933.27 samples/s\n",
-      "[Epoch 2 Batch 300/746] loss 5.52, ppl 249.74, throughput 1858.71 samples/s\n",
-      "[Epoch 2 Batch 400/746] loss 5.52, ppl 249.21, throughput 1896.20 samples/s\n",
-      "[Epoch 2 Batch 500/746] loss 5.42, ppl 226.61, throughput 1965.41 samples/s\n",
-      "[Epoch 2 Batch 600/746] loss 5.36, ppl 212.60, throughput 1874.85 samples/s\n",
-      "[Epoch 2 Batch 700/746] loss 5.33, ppl 206.46, throughput 1890.60 samples/s\n",
-      "[Epoch 2] throughput 1907.40 samples/s\n",
-      "[Epoch 2] time cost 34.70s, valid loss 5.26, valid ppl 192.69\n",
-      "test loss 5.18, test ppl 177.32\n",
-      "Total training throughput 1534.74 samples/s\n"
+      "[Epoch 0 Batch 100/746] loss 8.05, ppl 3147.06, throughput 1125.76 samples/s\n",
+      "[Epoch 0 Batch 200/746] loss 7.25, ppl 1402.82, throughput 1201.12 samples/s\n",
+      "[Epoch 0 Batch 300/746] loss 6.95, ppl 1047.72, throughput 1207.49 samples/s\n",
+      "[Epoch 0 Batch 400/746] loss 6.70, ppl 810.72, throughput 1213.90 samples/s\n",
+      "[Epoch 0 Batch 500/746] loss 6.51, ppl 670.70, throughput 1214.49 samples/s\n",
+      "[Epoch 0 Batch 600/746] loss 6.33, ppl 562.60, throughput 1211.53 samples/s\n",
+      "[Epoch 0 Batch 700/746] loss 6.21, ppl 498.81, throughput 1203.75 samples/s\n",
+      "[Epoch 0] throughput 1195.34 samples/s\n",
+      "[Epoch 0] time cost 54.93s, valid loss 5.90, valid ppl 364.40\n",
+      "test loss 5.82, test ppl 336.80\n",
+      "[Epoch 1 Batch 100/746] loss 6.10, ppl 446.09, throughput 1190.65 samples/s\n",
+      "[Epoch 1 Batch 200/746] loss 6.00, ppl 404.99, throughput 1207.26 samples/s\n",
+      "[Epoch 1 Batch 300/746] loss 5.94, ppl 378.32, throughput 1200.87 samples/s\n",
+      "[Epoch 1 Batch 400/746] loss 5.88, ppl 356.80, throughput 1197.75 samples/s\n",
+      "[Epoch 1 Batch 500/746] loss 5.77, ppl 320.43, throughput 1208.13 samples/s\n",
+      "[Epoch 1 Batch 600/746] loss 5.68, ppl 292.73, throughput 1203.65 samples/s\n",
+      "[Epoch 1 Batch 700/746] loss 5.63, ppl 277.95, throughput 1217.44 samples/s\n",
+      "[Epoch 1] throughput 1206.55 samples/s\n",
+      "[Epoch 1] time cost 54.37s, valid loss 5.49, valid ppl 241.20\n",
+      "test loss 5.40, test ppl 221.14\n",
+      "[Epoch 2 Batch 100/746] loss 5.61, ppl 273.02, throughput 1188.24 samples/s\n",
+      "[Epoch 2 Batch 200/746] loss 5.53, ppl 252.61, throughput 1202.37 samples/s\n",
+      "[Epoch 2 Batch 300/746] loss 5.50, ppl 245.29, throughput 1200.85 samples/s\n",
+      "[Epoch 2 Batch 400/746] loss 5.50, ppl 244.17, throughput 1202.60 samples/s\n",
+      "[Epoch 2 Batch 500/746] loss 5.40, ppl 222.01, throughput 1213.11 samples/s\n",
+      "[Epoch 2 Batch 600/746] loss 5.34, ppl 208.07, throughput 1208.37 samples/s\n",
+      "[Epoch 2 Batch 700/746] loss 5.31, ppl 203.18, throughput 1208.56 samples/s\n",
+      "[Epoch 2] throughput 1205.63 samples/s\n",
+      "[Epoch 2] time cost 54.39s, valid loss 5.25, valid ppl 190.18\n",
+      "test loss 5.16, test ppl 174.20\n",
+      "Total training throughput 990.11 samples/s\n"
      ]
     }
    ],
    "source": [
-    "train(model, train_data, val_data, test_data, epochs)"
+    "train(model, train_data, val_data, test_data, epochs, lr)"
    ]
   },
   {
@@ -377,13 +379,13 @@
      "text": [
       "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
       "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-      "100 4982k  100 4982k    0     0  6107k      0 --:--:-- --:--:-- --:--:-- 6105k\n",
+      "100 4982k  100 4982k    0     0  29.1M      0 --:--:-- --:--:-- --:--:-- 28.9M\n",
       "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
       "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-      "100  390k  100  390k    0     0  1052k      0 --:--:-- --:--:-- --:--:-- 1055k\n",
+      "100  390k  100  390k    0     0  5205k      0 --:--:-- --:--:-- --:--:-- 5205k\n",
       "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\n",
       "                                 Dload  Upload   Total   Spent    Left  Speed\n",
-      "100  439k  100  439k    0     0  1195k      0 --:--:-- --:--:-- --:--:-- 1194k\n",
+      "100  439k  100  439k    0     0  4724k      0 --:--:-- --:--:-- --:--:-- 4724k\n",
       "['ptb.test.txt', 'ptb.train.txt', 'ptb.valid.txt']\n"
      ]
     }
@@ -419,7 +421,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Best validation loss 6.36, test ppl 575.78\n"
+      "Best validation loss 6.49, test ppl 660.12\n"
      ]
     }
    ],
@@ -437,24 +439,21 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[Epoch 0] throughput 1714.99 samples/s\n",
-      "[Epoch 0] time cost 2.41s, valid loss 5.32, valid ppl 205.25\n",
-      "test loss 5.32, test ppl 205.25\n",
-      "[Epoch 1] throughput 1682.03 samples/s\n",
-      "[Epoch 1] time cost 2.41s, valid loss 5.04, valid ppl 154.75\n",
-      "test loss 5.04, test ppl 154.75\n",
-      "[Epoch 2] throughput 1979.00 samples/s\n",
-      "[Epoch 2] time cost 2.42s, valid loss 4.96, valid ppl 143.20\n",
-      "test loss 4.96, test ppl 143.20\n",
-      "Total training throughput 620.79 samples/s\n"
+      "[Epoch 0] throughput 1183.07 samples/s\n",
+      "[Epoch 0] time cost 3.79s, valid loss 5.19, valid ppl 179.67\n",
+      "test loss 5.19, test ppl 179.67\n",
+      "[Epoch 1] throughput 1225.89 samples/s\n",
+      "[Epoch 1] time cost 3.71s, valid loss 5.30, valid ppl 200.73\n",
+      "Learning rate now 5.000000\n",
+      "[Epoch 2] throughput 1207.51 samples/s\n",
+      "[Epoch 2] time cost 3.75s, valid loss 4.70, valid ppl 110.38\n",
+      "test loss 4.70, test ppl 110.38\n",
+      "Total training throughput 456.48 samples/s\n"
      ]
     }
    ],
    "source": [
-    "lr = 20\n",
-    "epochs = 5\n",
-    "\n",
-    "train(model, ptb_val_data, ptb_val_data, ptb_val_data, 3)"
+    "train(model, ptb_val_data, ptb_val_data, ptb_val_data, epochs=3, lr=20)"
    ]
   },
   {
@@ -477,21 +476,21 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.12"
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
   }
  },
  "nbformat": 4,
diff --git a/docs/examples/word_embedding/word_embedding.ipynb b/docs/examples/word_embedding/word_embedding.ipynb
index 86c3156608..65aa25184c 100644
--- a/docs/examples/word_embedding/word_embedding.ipynb
+++ b/docs/examples/word_embedding/word_embedding.ipynb
@@ -111,7 +111,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "To attach word embedding to indexed words in `vocab`, let us go on to create a fastText word embedding instance by specifying the embedding name `fasttext` and the source name `wiki.simple.vec`."
+    "To attach word embedding to indexed words in `vocab`, let us go on to create a fastText word embedding instance by specifying the embedding name `fasttext` and the source name `wiki.simple`."
    ]
   },
   {
diff --git a/docs/examples/word_embedding_evaluation/word_embedding_evaluation.ipynb b/docs/examples/word_embedding_evaluation/word_embedding_evaluation.ipynb
index 95918b88a6..5c5ad069fa 100644
--- a/docs/examples/word_embedding_evaluation/word_embedding_evaluation.ipynb
+++ b/docs/examples/word_embedding_evaluation/word_embedding_evaluation.ipynb
@@ -479,7 +479,7 @@
     "pd.options.display.max_rows = 999\n",
     "pd.options.display.precision = 3\n",
     "\n",
-    "df = pd.read_table(\"../../scripts/word_embedding_evaluation/results-vocablimit.csv\",\n",
+    "df = pd.read_table(\"../../../scripts/word_embedding_evaluation/results-vocablimit.csv\",\n",
     "                   header=None, names=[\n",
     "                       \"evaluation_type\", \"dataset\", \"kwargs\", \"embedding_name\",\n",
     "                       \"embedding_source\", \"evaluation\", \"value\", \"num_samples\"\n",