Update generated files

AditiSinha · Aug 29, 2020 · e5f9c43 · e5f9c43
1 parent c471a01
commit e5f9c43
Show file tree

Hide file tree

Showing 6 changed files with 97 additions and 179 deletions.
diff --git a/examples/nlp/ipynb/pretrained_word_embeddings.ipynb b/examples/nlp/ipynb/pretrained_word_embeddings.ipynb
@@ -33,8 +33,7 @@
    "source": [
     "import numpy as np\n",
     "import tensorflow as tf\n",
-    "from tensorflow import keras\n",
-    ""
+    "from tensorflow import keras"
    ]
   },
   {
@@ -76,8 +75,7 @@
     "    \"news20.tar.gz\",\n",
     "    \"http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.tar.gz\",\n",
     "    untar=True,\n",
-    ")\n",
-    ""
+    ")"
    ]
   },
   {
@@ -100,17 +98,14 @@
     "import os\n",
     "import pathlib\n",
     "\n",
-    "os.listdir(pathlib.Path(data_path).parent)\n",
-    "\n",
     "data_dir = pathlib.Path(data_path).parent / \"20_newsgroup\"\n",
     "dirnames = os.listdir(data_dir)\n",
     "print(\"Number of directories:\", len(dirnames))\n",
     "print(\"Directory names:\", dirnames)\n",
     "\n",
     "fnames = os.listdir(data_dir / \"comp.graphics\")\n",
     "print(\"Number of files in comp.graphics:\", len(fnames))\n",
-    "print(\"Some example filenames:\", fnames[:5])\n",
-    ""
+    "print(\"Some example filenames:\", fnames[:5])"
    ]
   },
   {
@@ -130,8 +125,7 @@
    },
    "outputs": [],
    "source": [
-    "print(open(data_dir / \"comp.graphics\" / \"38987\").read())\n",
-    ""
+    "print(open(data_dir / \"comp.graphics\" / \"38987\").read())"
    ]
   },
   {
@@ -174,8 +168,7 @@
     "    class_index += 1\n",
     "\n",
     "print(\"Classes:\", class_names)\n",
-    "print(\"Number of samples:\", len(samples))\n",
-    ""
+    "print(\"Number of samples:\", len(samples))"
    ]
   },
   {
@@ -218,8 +211,7 @@
     "train_samples = samples[:-num_validation_samples]\n",
     "val_samples = samples[-num_validation_samples:]\n",
     "train_labels = labels[:-num_validation_samples]\n",
-    "val_labels = labels[-num_validation_samples:]\n",
-    ""
+    "val_labels = labels[-num_validation_samples:]"
    ]
   },
   {
@@ -249,8 +241,7 @@
     "\n",
     "vectorizer = TextVectorization(max_tokens=20000, output_sequence_length=200)\n",
     "text_ds = tf.data.Dataset.from_tensor_slices(train_samples).batch(128)\n",
-    "vectorizer.adapt(text_ds)\n",
-    ""
+    "vectorizer.adapt(text_ds)"
    ]
   },
   {
@@ -271,8 +262,7 @@
    },
    "outputs": [],
    "source": [
-    "vectorizer.get_vocabulary()[:5]\n",
-    ""
+    "vectorizer.get_vocabulary()[:5]"
    ]
   },
   {
@@ -292,9 +282,8 @@
    },
    "outputs": [],
    "source": [
-    "output = vectorizer(np.array([[\"the cat sat on the mat\"]]))\n",
-    "output.numpy()[0, :6]\n",
-    ""
+    "output = vectorizer([[\"the cat sat on the mat\"]])\n",
+    "output.numpy()[0, :6]"
    ]
   },
   {
@@ -319,8 +308,7 @@
    "outputs": [],
    "source": [
     "voc = vectorizer.get_vocabulary()\n",
-    "word_index = dict(zip(voc, range(2, len(voc))))\n",
-    ""
+    "word_index = dict(zip(voc, range(2, len(voc))))"
    ]
   },
   {
@@ -340,9 +328,8 @@
    },
    "outputs": [],
    "source": [
-    "test = [b\"the\", b\"cat\", b\"sat\", b\"on\", b\"the\", b\"mat\"]\n",
-    "[word_index[w] for w in test]\n",
-    ""
+    "test = [\"the\", \"cat\", \"sat\", \"on\", \"the\", \"mat\"]\n",
+    "[word_index[w] for w in test]"
    ]
   },
   {
@@ -401,8 +388,7 @@
     "        coefs = np.fromstring(coefs, \"f\", sep=\" \")\n",
     "        embeddings_index[word] = coefs\n",
     "\n",
-    "print(\"Found %s word vectors.\" % len(embeddings_index))\n",
-    ""
+    "print(\"Found %s word vectors.\" % len(embeddings_index))"
    ]
   },
   {
@@ -413,11 +399,7 @@
    "source": [
     "Now, let's prepare a corresponding embedding matrix that we can use in a Keras\n",
     "`Embedding` layer. It's a simple NumPy matrix where entry at index `i` is the pre-trained\n",
-    "vector for the word of index `i` in our `vectorizer`'s vocabulary.\n",
-    "\n",
-    "**Note:** the `TextVectorization` layer stores tokens as bytes, not `str` types.\n",
-    "This means that you need to decode them to `utf-8` before doing string comparisons, like\n",
-    "the below: `embeddings_index.get(word.decode('utf-8'))`"
+    "vector for the word of index `i` in our `vectorizer`'s vocabulary."
    ]
   },
   {
@@ -436,7 +418,7 @@
     "# Prepare embedding matrix\n",
     "embedding_matrix = np.zeros((num_tokens, embedding_dim))\n",
     "for word, i in word_index.items():\n",
-    "    embedding_vector = embeddings_index.get(word.decode(\"utf-8\"))\n",
+    "    embedding_vector = embeddings_index.get(word)\n",
     "    if embedding_vector is not None:\n",
     "        # Words not found in embedding index will be all-zeros.\n",
     "        # This includes the representation for \"padding\" and \"OOV\"\n",
@@ -445,7 +427,6 @@
     "    else:\n",
     "        misses += 1\n",
     "print(\"Converted %d words (%d misses)\" % (hits, misses))\n",
-    "\n",
     ""
    ]
   },
@@ -476,8 +457,7 @@
     "    embedding_dim,\n",
     "    embeddings_initializer=keras.initializers.Constant(embedding_matrix),\n",
     "    trainable=False,\n",
-    ")\n",
-    ""
+    ")"
    ]
   },
   {
@@ -513,8 +493,7 @@
     "x = layers.Dropout(0.5)(x)\n",
     "preds = layers.Dense(len(class_names), activation=\"softmax\")(x)\n",
     "model = keras.Model(int_sequences_input, preds)\n",
-    "model.summary()\n",
-    ""
+    "model.summary()"
    ]
   },
   {
@@ -541,8 +520,7 @@
     "x_val = vectorizer(np.array([[s] for s in val_samples])).numpy()\n",
     "\n",
     "y_train = np.array(train_labels)\n",
-    "y_val = np.array(val_labels)\n",
-    ""
+    "y_val = np.array(val_labels)"
    ]
   },
   {
@@ -566,8 +544,7 @@
     "model.compile(\n",
     "    loss=\"sparse_categorical_crossentropy\", optimizer=\"rmsprop\", metrics=[\"acc\"]\n",
     ")\n",
-    "model.fit(x_train, y_train, batch_size=128, epochs=20, validation_data=(x_val, y_val))\n",
-    ""
+    "model.fit(x_train, y_train, batch_size=128, epochs=20, validation_data=(x_val, y_val))"
    ]
   },
   {
@@ -602,8 +579,7 @@
     "    [[\"this message is about computer graphics and 3D modeling\"]]\n",
     ")\n",
     "\n",
-    "class_names[np.argmax(probabilities[0])]\n",
-    ""
+    "class_names[np.argmax(probabilities[0])]"
    ]
   }
  ],