diff --git a/colors.py b/colors.py
new file mode 100644
index 0000000..bfbaddf
--- /dev/null
+++ b/colors.py
@@ -0,0 +1,241 @@
+from collections import defaultdict
+import colorsys
+import csv
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatch
+
+__author__ = "Christopher Potts"
+__version__ = "CS224u, Stanford, Spring 2020"
+
+
+TURN_BOUNDARY =  " ### "
+
+
+class ColorsCorpusReader:
+    """Basic interface for the Stanford Colors in Context corpus:
+
+    https://cocolab.stanford.edu/datasets/colors.html
+
+    Parameters
+    ----------
+    src_filename : str
+        Full path to the corpus file.
+    word_count : int or None
+        If int, then only examples with `word_count` words in their
+        'contents' field are included (as estimated by the number of
+        whitespqce tokens). If None, then all examples are returned.
+    normalize_colors : bool
+         The colors in the corpus are in HLS format with values
+         [0, 360], [0, 100], [0, 100]. If `normalize_colors=True`,
+         these are scaled into [0, 1], [0, 1], [0, 1].
+
+    Usage
+    -----
+    corpus = ColorsCorpusReader('filteredCorpus.csv')
+
+    for ex in corpus.read():
+        # ...
+
+    """
+    def __init__(self, src_filename, word_count=None, normalize_colors=True):
+        self.src_filename = src_filename
+        self.word_count = word_count
+        self.normalize_colors = normalize_colors
+
+    def read(self):
+        """The main interface to the corpus.
+
+        As in the paper, turns taken in the same game and round are
+        grouped together into a single `ColorsCorpusExample` instance
+        with the turn texts separated by `TURN_BOUNDARY`, formatted
+        as a string.
+
+        Yields
+        ------
+        `ColorsCorpusExample` with the `normalize_colors` attribute set
+        as in `self.normalize_colors` in this class.
+
+        """
+        grouped = defaultdict(list)
+        with open(self.src_filename) as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                if row['role'] == 'speaker' and self._word_count_filter(row):
+                    grouped[(row['gameid'], row['roundNum'])].append(row)
+        for rows in grouped.values():
+            yield ColorsCorpusExample(
+                rows, normalize_colors=self.normalize_colors)
+
+    def _word_count_filter(self, row):
+        return self.word_count is None or \
+          row['contents'].count(" ") == (self.word_count-1)
+
+
+class ColorsCorpusExample:
+    """Interface to individual examples in the Stanford Colors in
+    Context corpus.
+
+    Parameters
+    ----------
+    rows : list of dict
+        This contains all of the turns associated with a given game
+        and round. The assumption is that all of the key-value pairs
+        in these dicts are the same except for the 'contents' key.
+    normalize_colors : bool
+         The colors in the corpus are in HLS format with values
+         [0, 360], [0, 100], [0, 100]. If `normalize_colors=True`,
+         these are scaled into [0, 1], [0, 1], [0, 1].
+
+    Usage
+    -----
+    We assume that these instances are created by `ColorsCorpusReader`.
+    For an example of one being created directly, see
+    `test/test_colors.py::test_color_corpus_example`.
+
+    Note
+    ----
+    There are values in the corpus that are present in `rows` but
+    not captured in attributes right now, to keep this code from
+    growing very complex. It should be straightforward to bring
+    in these additional attributes by subclassing this class.
+
+    """
+    def __init__(self, rows, normalize_colors=True):
+        self.normalize_colors = normalize_colors
+        self.contents = TURN_BOUNDARY.join([r['contents'] for r in rows])
+        # Make sure our assumptions about these rows are correct:
+        self._check_row_alignment(rows)
+        row = rows[0]
+        self.gameid = row['gameid']
+        self.roundNum = int(row['roundNum'])
+        self.condition = row['condition']
+        self.outcome = row['outcome'] == 'true'
+        self.clickStatus = row['clickStatus']
+        self.color_data = []
+        for typ in ['click', 'alt1', 'alt2']:
+            self.color_data.append({
+                'type': typ,
+                'Status': row['{}Status'.format(typ)],
+                'rep': self._get_color_rep(row, typ),
+                'speaker': int(row['{}LocS'.format(typ)]),
+                'listener': int(row['{}LocL'.format(typ)])})
+        self.colors = self._get_reps_in_order('Status')
+        self.listener_context = self._get_reps_in_order('listener')
+        self.speaker_context = self._get_reps_in_order('speaker')
+
+    def parse_turns(self):
+        """"Turns the `contents` string into a list by splitting on
+        `TURN_BOUNDARY`.
+
+        Returns
+        -------
+        list of str
+
+        """
+        return self.contents.split(TURN_BOUNDARY)
+
+    def display(self, typ='model'):
+        """Prints examples to the screen in an intuitive format: the
+        utterance text appears first, following by the three color
+        patches, with the target identified by a black border in the
+        'speaker' and 'model' variants.
+
+        Parameters
+        ----------
+        typ : str
+            Should be 'model', 'speaker', or 'listener'. This
+            determines the order the color patches are given. For
+            'speaker' and 'listener', this is the order in the corpus.
+            For 'model', it is a version with the two distractors
+            printed in their canonical order and the target given last.
+
+        Raises
+        ------
+        ValueError
+            If `typ` isn't one of 'model', 'speaker', 'listener'.
+
+        Prints
+        ------
+        text to standard output and three color patches as a
+        `matplotlib.pyplot` image. For notebook usage, this should
+        all embed nicely.
+
+        """
+        print(self.contents)
+        if typ == 'model':
+            colors = self.colors
+            target_index = 2
+        elif typ == 'listener':
+            colors = self.listener_context
+            target_index = None
+        elif typ == 'speaker':
+            colors = self.speaker_context
+            target_index = self._get_target_index('speaker')
+        else:
+            raise ValueError('`typ` options: "model", "listener", "speaker"')
+
+        rgbs = [self._convert_hls_to_rgb(*c) for c in colors]
+
+        fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(3, 1))
+
+        for i, c in enumerate(rgbs):
+            ec = c if (i != target_index or typ == 'listener') else "black"
+            patch = mpatch.Rectangle((0, 0), 1, 1, color=c, ec=ec, lw=8)
+            axes[i].add_patch(patch)
+            axes[i].axis('off')
+
+    def _get_color_rep(self, row, typ):
+        rep = []
+        for dim in ['H', 'L', 'S']:
+            colname = "{}Col{}".format(typ, dim)
+            rep.append(float(row[colname]))
+        if self.normalize_colors:
+            rep = self._scale_color(*rep)
+        return rep
+
+    def _convert_hls_to_rgb(self, h, l, s):
+        if not self.normalize_colors:
+            h, l, s = self._scale_color(h, l, s)
+        return colorsys.hls_to_rgb(h, l, s)
+
+    @staticmethod
+    def _scale_color(h, l, s):
+        return [h/360, l/100, s/100]
+
+    def _get_reps_in_order(self, field):
+        colors = [(d[field], d['rep']) for d in self.color_data]
+        return [rep for s, rep in sorted(colors)]
+
+    def _get_target_index(self, field):
+        for d in self.color_data:
+            if d['Status'] == 'target':
+                return d[field] - 1
+
+    @staticmethod
+    def _check_row_alignment(rows):
+        """We expect all the dicts in `rows` to have the same
+        keys and values except for the keys associated with the
+        messages. This function tests this assumption holds.
+
+        """
+        keys = set(rows[0].keys())
+        for row in rows[1:]:
+            if set(row.keys()) != keys:
+                raise RuntimeError(
+                    "The dicts in the `rows` argument to `ColorsCorpusExample` "
+                    "must have all the same keys.")
+        exempted = {'contents', 'msgTime',
+                    'numRawWords', 'numRawChars',
+                    'numCleanWords', 'numCleanChars'}
+        keys = keys - exempted
+        for row in rows[1: ]:
+            for key in keys:
+                if rows[0][key] != row[key]:
+                    raise RuntimeError(
+                        "The dicts in the `rows` argument to `ColorsCorpusExample` "
+                        "must have all the same key values except for the keys "
+                        "associated with the message. The key {} has values {} "
+                        "and {}".format(key, rows[0][key], row[key]))
+
+    def __str__(self):
+        return self.contents
diff --git a/colors_overview.ipynb b/colors_overview.ipynb
new file mode 100644
index 0000000..2c885c7
--- /dev/null
+++ b/colors_overview.ipynb
@@ -0,0 +1,1763 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Pragmatic color describers"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "__author__ = \"Christopher Potts\"\n",
+    "__version__ = \"CS224u, Stanford, Spring 2019\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Contents\n",
+    "\n",
+    "1. [Overview](#Overview)\n",
+    "1. [Set-up](#Set-up)\n",
+    "1. [The corpus](#The-corpus)\n",
+    "  1. [Corpus reader](#Corpus-reader)\n",
+    "  1. [ColorsCorpusExample instances](#ColorsCorpusExample-instances)\n",
+    "    1. [Displaying examples](#Displaying-examples)\n",
+    "    1. [Color representations](#Color-representations)\n",
+    "    1. [Utterance texts](#Utterance-texts)\n",
+    "  1. [Far, Split, and Close conditions](#Far,-Split,-and-Close-conditions)\n",
+    "1. [Toy problems for development work](#Toy-problems-for-development-work)\n",
+    "1. [Core model](#Core-model)\n",
+    "  1. [Toy dataset illustration](#Toy-dataset-illustration)\n",
+    "  1. [Predicting sequences](#Predicting-sequences)\n",
+    "  1. [Listener-based evaluation](#Listener-based-evaluation)\n",
+    "  1. [Other prediction and evaluation methods](#Other-prediction-and-evaluation-methods)\n",
+    "  1. [Cross-validation](#Cross-validation)\n",
+    "1. [Baseline SCC model](#Baseline-SCC-model)\n",
+    "1. [Modifying the core model](#Modifying-the-core-model)\n",
+    "  1. [Illustration: LSTM Cells](#Illustration:-LSTM-Cells)\n",
+    "  1. [Illustration: Deeper models](#Illustration:-Deeper-models)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Overview\n",
+    "\n",
+    "This notebook is part of our unit on grounding. It illustrates core concepts from the unit, and it provides useful background material for the associated homework and bake-off."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set-up"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from colors import ColorsCorpusReader\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "import torch\n",
+    "from torch_color_describer import (\n",
+    "    ContextualColorDescriber, create_example_dataset)\n",
+    "import utils\n",
+    "from utils import START_SYMBOL, END_SYMBOL, UNK_SYMBOL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "utils.fix_random_seeds()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The [Stanford English Colors in Context corpus](https://cocolab.stanford.edu/datasets/colors.html) (SCC) is included in the data distribution for this course. If you store the data in a non-standard place, you'll need to update the following:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "COLORS_SRC_FILENAME = os.path.join(\n",
+    "    \"data\", \"colors\", \"filteredCorpus.csv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## The corpus"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The SCC corpus is based in a two-player interactive game. The two players share a context consisting of three color patches, with the display order randomized between them so that they can't use positional information when communicating.\n",
+    "\n",
+    "The __speaker__ is privately assigned a target color and asked to produce a description of it that will enable the __listener__ to identify the speaker's target. The listener makes a choice based on the speaker's message, and the two  succeed if and only if the listener identifies the target correctly.\n",
+    "\n",
+    "In the game, the two players played repeated reference games and could communicate with each other in a free-form way. This opens up the possibility of modeling these repeated interactions as task-oriented dialogues. However, for this unit, we'll ignore most of this structure. We'll treat the corpus as a bunch of independent reference games played by anonymous players, and we will ignore the listener and their choices entirely.\n",
+    "\n",
+    "For the bake-off, we will be distributing a separate test set. Thus, all of the data in the SCC can be used for exploration and development."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Corpus reader"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The corpus reader class is `ColorsCorpusReader` in `colors.py`. The reader's primary function is to let you iterate over corpus examples:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "corpus = ColorsCorpusReader(\n",
+    "    COLORS_SRC_FILENAME,\n",
+    "    word_count=None, \n",
+    "    normalize_colors=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The two keyword arguments have their default values here. \n",
+    "\n",
+    "* If you supply `word_count` with an interger value, it will restrict to just examples where the utterance has that number of words (using a whitespace heuristic). This creates smaller corpora that are useful for development.\n",
+    "\n",
+    "* The colors in the corpus are in [HLS format](https://en.wikipedia.org/wiki/HSL_and_HSV). With `normalize_colors=False`, the first (hue) value is an integer between 1 and 360 inclusive, and the L (lightness) and S (saturation) values are between 1 and 100 inclusive. With `normalize_colors=True`, these values are all scaled to between 0 and 1 inclusive. The default is `normalize_colors=True` because this is a better choice for all the machine learning models we'll consider."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "examples = list(corpus.read())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can verify that we read in the same number of examples as reported in [Monroe et al. 2017](https://transacl.org/ojs/index.php/tacl/article/view/1142):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "46994"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Should be 46994:\n",
+    "\n",
+    "len(examples)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ColorsCorpusExample instances"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The examples are `ColorsCorpusExample` instances:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ex1 = next(corpus.read())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These objects have a lot of attributes and methods designed to help you study the corpus and use it for our machine learning tasks. Let's review some highlights."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Displaying examples"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can see what the speaker saw, with the utterance they chose wote above the patches:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The darker blue one\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAALUAAABECAYAAADHnXQVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAABLUlEQVR4nO3YMUrEUBRA0XyZSiutnC24EjvXajcrcQtOpZW2315kVMgQ5nJOmxTvweURMuacC5RcbT0ArE3U5IiaHFGTI2pydqcejjEu/tfInHP85b3nu/eL33VZluXp7fbXfR9uPhO7vnxc/7irS02OqMkRNTknv6m/u398Pdccqzke9luPwMZcanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9Tk7P7z8vGwP9ccsBqXmhxRkyNqcsacc+sZYFUuNTmiJkfU5IiaHFGTI2pyvgBwhhdAIEFGnQAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 216x72 with 3 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ex1.display(typ='speaker')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is the original order of patches for the speaker. The target happens to the be the leftmost patch, as indicated by the black box around it.\n",
+    "\n",
+    "Here's what the listener saw, with the speaker's message printed above the patches:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The darker blue one\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAALUAAABECAYAAADHnXQVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAABFUlEQVR4nO3YsW1CMRRAUX+UCipShRXYhCqzpsomrJBUSRVaswAiFEiIq3Nau3hPunLhZc45oGT16AHg3kRNjqjJETU5oibn5drhfnN6+q+R4996ueXe7vD99LuOMcbX59u/+368/iZ2ff/ZXtzVS02OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IianGXO+egZ4K681OSImhxRkyNqckRNjqjJOQNHYRKDRd/3AwAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 216x72 with 3 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ex1.display(typ='listener')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The listener isn't shown the target, of course, so no patches are highlighted."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If `display` is called with no arguments, then the target is placed in the final position and the other two are given in an order determined by the corpus metadata:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The darker blue one\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAALUAAABECAYAAADHnXQVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAABLElEQVR4nO3YsU3DUBRAUX+UCiqoyApMQsesdJmEFUgFFbSfBVBwYcnK5ZzWLt6Trp4sjznnAiU3ew8AWxM1OaImR9TkiJqcw6WHrw+fV/9r5OXjfqx57+nu++p3XZZlefu6/XPfMUZi1znnr7u61OSImhxRk3Pxm5r/4fH5fe8RVjmfjqvec6nJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRk3PYewD2dz4d9x5hUy41OaImR9TkjDnn3jPAplxqckRNjqjJETU5oiZH1OT8AK1HF0DPcEkgAAAAAElFTkSuQmCC\n",
+      "text/plain": [
+       "<Figure size 216x72 with 3 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ex1.display()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This is the representation order we use for our machine learning models."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Color representations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For machine learning, we'll often need to access the color representations directly. The primary attribute for this is `colors`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[0.7861111111111111, 0.5, 0.87],\n",
+       " [0.6888888888888889, 0.5, 0.92],\n",
+       " [0.6277777777777778, 0.5, 0.81]]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ex1.colors"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this display order, the third element is the target color and the first two are the distractors. The attributes `speaker_context` and `listener_context` return the same colors but in the order that those players saw them. For example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[0.6277777777777778, 0.5, 0.81],\n",
+       " [0.7861111111111111, 0.5, 0.87],\n",
+       " [0.6888888888888889, 0.5, 0.92]]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ex1.speaker_context"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Utterance texts"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Utterances are just strings: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'The darker blue one'"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ex1.contents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are cases where the speaker made a sequences of utterances for the same trial. We follow [Monroe et al. 2017](https://transacl.org/ojs/index.php/tacl/article/view/1142) in concatenating these into a single utterances. To preserve the original information, the individual turns are separated by `\" ### \"`. Example 3 is the first with this property – let's check it out:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ex3 = examples[2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'Medium pink ### the medium dark one'"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ex3.contents"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The method `parse_turns` will parse this into individual turns:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Medium pink', 'the medium dark one']"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ex3.parse_turns()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For examples consisting of a single turn, `parse_turns` returns a list of length 1:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['The darker blue one']"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ex1.parse_turns()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Far, Split, and Close conditions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The SCC contains three conditions:\n",
+    "    \n",
+    "__Far condition__: All three colors are far apart in color space.  Example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Condition type: far\n",
+      "purple\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAALUAAABECAYAAADHnXQVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAABLUlEQVR4nO3YwUnEUBRA0XyZbrQE3Qp24SytaJZOF4JbLUHr+TYgYxaBMNdztsniPbg8Qsacc4GSm70HgK2JmhxRkyNqckRNzuHSw7vvx6v/NfJ1+z7WvPf59nD1uy7Lstw/ffy57xgjseuc89ddXWpyRE2OqMm5+E3N//Dy/Lr3CKuczsdV77nU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMkRNTmiJkfU5IiaHFGTI2pyRE2OqMk57D0A+zudj3uPsCmXmhxRkyNqcsacc+8ZYFMuNTmiJkfU5IiaHFGTI2pyfgAdJBcf7IJsUgAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 216x72 with 3 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print(\"Condition type:\", examples[1].condition)\n",
+    "\n",
+    "examples[1].display()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Split condition__: The target is close to one of the distractors, and the other is far away from both of them. Example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Condition type: split\n",
+      "lime\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAALUAAABECAYAAADHnXQVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAABKklEQVR4nO3YQUrDUBRA0XzpbuyO7FTXUHAsuAad1h3Z9Xw3IDWDQOj1nGkyeA8uj5Ax51yg5GHvAWBroiZH1OSImhxRk3O49fDt8+Xuf428Pn+MNe9dTl93v+uyLMvp8vTnvmOMxK5zzl93danJETU5oibn5jc1/8P79+PeI6xyPl5XvedSkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oiZH1OSImhxRkyNqckRNjqjJETU5oibnsPcA7O98vO49wqZcanJETY6oyRlzzr1ngE251OSImhxRkyNqckRNjqjJ+QHLEhcAkintbgAAAABJRU5ErkJggg==\n",
+      "text/plain": [
+       "<Figure size 216x72 with 3 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print(\"Condition type:\", examples[3].condition)\n",
+    "\n",
+    "examples[3].display()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Close condition__: The target is similar to both distractors. Example:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Condition type: close\n",
+      "Medium pink ### the medium dark one\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "iVBORw0KGgoAAAANSUhEUgAAALUAAABECAYAAADHnXQVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAABK0lEQVR4nO3YwUnEUBRA0XyZCnRlH4rDtGC9tiAO2ocrbeHbgIxZBMJcz9kmi/fg8ggZc84FSm72HgC2JmpyRE2OqMkRNTmHSw+/7z6v/tfI7df9WPPey+PH1e+6LMvy/P7w575jjMSuc85fd3WpyRE1OaIm5+I3Nf/D29Pr3iOscjyfVr3nUpMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaImR9TkiJocUZMjanJETY6oyRE1OaIm57D3AOzveD7tPcKmXGpyRE2OqMkZc869Z4BNudTkiJocUZMjanJETY6oyfkBPhUWwkgMDc4AAAAASUVORK5CYII=\n",
+      "text/plain": [
+       "<Figure size 216x72 with 3 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "print(\"Condition type:\", examples[2].condition)\n",
+    "\n",
+    "examples[2].display()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These conditions go from easiest to hardest when it comes to reliable communication. In the __Far__ condition, the context is hardly relevant, whereas the nature of the distractors reliably shapes the speaker's choices in the other two conditions. \n",
+    "\n",
+    "You can begin to see how this affects speaker choices in the above examples: \"purple\" suffices for the __Far__ condition, a more marked single word (\"lime\") suffices in the __Split__ condition, and the __Close__ condition triggers a pretty long, complex description."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `condition` attribute provides access to this value: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'close'"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ex1.condition"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following verifies that we have the same number of examples per condition as reported in [Monroe et al. 2017](https://transacl.org/ojs/index.php/tacl/article/view/1142):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "far      15782\n",
+       "split    15693\n",
+       "close    15519\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.Series([ex.condition for ex in examples]).value_counts()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Toy problems for development work"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The SCC corpus is fairly large and quite challenging as an NLU task. This means it isn't ideal when it comes to testing hypotheses and debugging code. Poor performance could trace to a mistake, but it could just as easily trace to the fact that the problem is very challenging from the point of view of optimization.\n",
+    "\n",
+    "To address this, the module `torch_color_describer.py` includes a function `create_example_dataset` for creating small, easy datasets with the same basic properties as the SCC corpus.\n",
+    "\n",
+    "Here's a toy problem containing just six examples:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tiny_contexts, tiny_words, tiny_vocab = create_example_dataset(\n",
+    "    group_size=2, vec_dim=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['<s>', '</s>', 'A', 'B', '$UNK']"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tiny_vocab"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[['<s>', 'A', '</s>'],\n",
+       " ['<s>', 'A', '</s>'],\n",
+       " ['<s>', 'A', 'B', '</s>'],\n",
+       " ['<s>', 'A', 'B', '</s>'],\n",
+       " ['<s>', 'B', 'A', 'B', 'A', '</s>'],\n",
+       " ['<s>', 'B', 'A', 'B', 'A', '</s>']]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tiny_words"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[array([0.84464215, 0.94729424]),\n",
+       "  array([0.5353399 , 0.57843591]),\n",
+       "  array([0.00500215, 0.05500586])],\n",
+       " [array([0.80595944, 0.84372759]),\n",
+       "  array([0.50107106, 0.40530719]),\n",
+       "  array([0.01738777, 0.08438436])],\n",
+       " [array([0.88390396, 0.88984181]),\n",
+       "  array([0.05563814, 0.17386006]),\n",
+       "  array([0.54320392, 0.54026499])],\n",
+       " [array([0.88452288, 0.85557427]),\n",
+       "  array([0.04306275, 0.15269883]),\n",
+       "  array([0.55176147, 0.43193186])],\n",
+       " [array([0.56949887, 0.52074521]),\n",
+       "  array([0.16142565, 0.14594636]),\n",
+       "  array([0.81854917, 0.81934328])],\n",
+       " [array([0.47570688, 0.51040813]),\n",
+       "  array([0.16588093, 0.12370395]),\n",
+       "  array([0.90724562, 0.99462315])]]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tiny_contexts"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Each member of `tiny_contexts` contains three vectors.  The final (target) vector always has values in a range that determines the corresponding word sequence, which is drawn from a set of three fixed sequences. Thus, the model basically just needs to learn to ignore the distractors and find the association between the target vector and the corresponding sequence. \n",
+    "\n",
+    "All the models we study have a capacity to solve this task with very little data, so you should see perfect or near perfect performance on reasonably-sized versions of this task."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Core model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Our core model for this problem is implemented in `torch_color_describer.py` as `ContextualColorDescriber`. At its heart, this is a pretty standard encoder–decoder model:\n",
+    "\n",
+    "* `Encoder`: Processes the color contexts as a sequence. We always place the target in final position so that it is closest to the supervision signals that we get when decoding.\n",
+    "\n",
+    "* `Decoder`: A neural language model whose initial hidden representation is the final hidden representation of the `Encoder`.\n",
+    "\n",
+    "* `EncoderDecoder`: Coordinates the operations of the `Encoder` and `Decoder`.\n",
+    "\n",
+    "Finally, `ContextualColorDescriber` is a wrapper around these model components. It handle the details of training and implements the prediction and evaluation functions that we will use.\n",
+    "\n",
+    "Many additional details about this model are included in the slides for this unit."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Toy dataset illustration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To highlight the core functionality of `ContextualColorDescriber`, let's create a small toy dataset and use it to train and evaluate a model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_color_seqs, toy_word_seqs, toy_vocab = create_example_dataset(\n",
+    "    group_size=50, vec_dim=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_color_seqs_train, toy_color_seqs_test, toy_word_seqs_train, toy_word_seqs_test = \\\n",
+    "    train_test_split(toy_color_seqs, toy_word_seqs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we expose all of the available parameters with their default values:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_mod = ContextualColorDescriber(\n",
+    "    toy_vocab, \n",
+    "    embedding=None,  # Option to supply a pretrained matrix as an `np.array`.\n",
+    "    embed_dim=10, \n",
+    "    hidden_dim=10, \n",
+    "    max_iter=100, \n",
+    "    eta=0.01,\n",
+    "    optimizer=torch.optim.Adam,\n",
+    "    batch_size=128,\n",
+    "    l2_strength=0.0,\n",
+    "    warm_start=False,\n",
+    "    device=None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 100; err = 0.13451486825942993"
+     ]
+    }
+   ],
+   "source": [
+    "_ = toy_mod.fit(toy_color_seqs_train, toy_word_seqs_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predicting sequences"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `predict` method takes a list of color contexts as input and returns model descriptions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_preds = toy_mod.predict(toy_color_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['<s>', 'A', 'B', '</s>']"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "toy_preds[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can then check that we predicted all correct sequences:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "toy_correct = sum(1 for x, p in zip(toy_word_seqs_test, toy_preds))\n",
+    "\n",
+    "toy_correct / len(toy_word_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For real problems, this is too stringent a requirement, since there are generally many equally good descriptions. This insight gives rise to metrics like [BLEU](https://en.wikipedia.org/wiki/BLEU), [METEOR](https://en.wikipedia.org/wiki/METEOR), [ROUGE](https://en.wikipedia.org/wiki/ROUGE_(metric)), [CIDEr](https://arxiv.org/pdf/1411.5726.pdf), and others, which seek to relax the requirement of an exact match with the test sequence. These are reasonable options to explore, but we will instead adopt a communcation-based evaluation, as discussed in the next section."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Listener-based evaluation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "`ContextualColorDescriber` implements a method `listener_accuracy` that we will use for our primary evaluations in the assignment and bake-off. The essence of the method is that we can calculate\n",
+    "\n",
+    "$$\n",
+    "c^{*} = \\text{argmax}_{c \\in C} P_S(\\text{utterance} \\mid c)\n",
+    "$$\n",
+    "\n",
+    "\n",
+    "where $P_S$ is our describer model and $C$ is the set of all permutations of all three colors in the color context. We take $c^{*}$ to be a correct prediction if it is one where the target is in the privileged final position. (There are two such contexts; we try both in case the order of the distractors influences the predictions, and the model is correct if one of them has the highest probability.)\n",
+    "\n",
+    "Here's the listener accuracy of our toy model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "toy_mod.listener_accuracy(toy_color_seqs_test, toy_word_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Other prediction and evaluation methods"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can get the perplexities for test examles with `perpelexities`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_perp = toy_mod.perplexities(toy_color_seqs_test, toy_word_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.018597919229854"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "toy_perp[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can use `predict_proba` to see the full probability distributions assigned to test examples:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_proba = toy_mod.predict_proba(toy_color_seqs_test, toy_word_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4, 5)"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "toy_proba[0].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'<s>': 1.0, '</s>': 0.0, 'A': 0.0, 'B': 0.0, '$UNK': 0.0}\n",
+      "{'<s>': 0.0036859103, '</s>': 0.0002668097, 'A': 0.9854643, 'B': 0.00914348, '$UNK': 0.0014396048}\n",
+      "{'<s>': 0.004782134, '</s>': 0.024507374, 'A': 0.0019362223, 'B': 0.96381474, '$UNK': 0.0049594548}\n",
+      "{'<s>': 0.0050890064, '</s>': 0.9780351, 'A': 0.014443797, 'B': 0.0008280464, '$UNK': 0.0016041624}\n"
+     ]
+    }
+   ],
+   "source": [
+    "for timestep in toy_proba[0]:\n",
+    "    print(dict(zip(toy_vocab, timestep)))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Cross-validation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can use `utils.fit_classifier_with_crossvalidation` to cross-validate these models. Just be sure to set `scoring=None` so that the sklearn model selection methods use the `score` method of `ContextualColorDescriber`, which is an alias for `listener_accuracy`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 100; err = 0.12754583358764648"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Best params: {'hidden_dim': 20}\n",
+      "Best score: 0.982\n"
+     ]
+    }
+   ],
+   "source": [
+    "best_mod = utils.fit_classifier_with_crossvalidation(\n",
+    "    toy_color_seqs_train, \n",
+    "    toy_word_seqs_train, \n",
+    "    toy_mod, \n",
+    "    cv=2,\n",
+    "    scoring=None,\n",
+    "    param_grid={'hidden_dim': [10, 20]})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Baseline SCC model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Just to show how all the pieces come together, here's a very basic SCC experiment using the core code and very simplistic assumptions (which you will revisit in the assignment) about how to represent the examples:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To facilitate quick development, we'll restrict attention to the two-word examples:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_corpus = ColorsCorpusReader(COLORS_SRC_FILENAME, word_count=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_examples = list(dev_corpus.read())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "13890"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(dev_examples)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we extract the raw colors and texts (as strings):"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_cols, dev_texts = zip(*[[ex.colors, ex.contents] for ex in dev_examples])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To tokenize the examples, we'll just split on whitespace, taking care to add the required boundary symbols:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_word_seqs = [[START_SYMBOL] + text.split() + [END_SYMBOL] for text in dev_texts]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We'll use a random train–test split:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_cols_train, dev_cols_test, dev_word_seqs_train, dev_word_seqs_test = \\\n",
+    "    train_test_split(dev_cols, dev_word_seqs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Our vocab is determined by the train set, and we take care to include the `$UNK` token:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_vocab = sorted({w for toks in dev_word_seqs_train for w in toks}) + [UNK_SYMBOL]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And now we're ready to train a model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_mod = ContextualColorDescriber(\n",
+    "    dev_vocab, \n",
+    "    embed_dim=10, \n",
+    "    hidden_dim=10, \n",
+    "    max_iter=10, \n",
+    "    batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 10; err = 101.7589635848999"
+     ]
+    }
+   ],
+   "source": [
+    "_ = dev_mod.fit(dev_cols_train, dev_word_seqs_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And finally an evaluation in terms of listener accuracy:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.5384393895767348"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dev_mod.listener_accuracy(dev_cols_test, dev_word_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Modifying the core model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The first few assignment problems concern how you preprocess the data for your model. After that, the goal is to subclass model components in `torch_color_describer.py`. For the bake-off submission, you can do whatever you like in terms of modeling, but my hope is that you'll be able to continue subclassing based on `torch_color_describer.py`.\n",
+    "\n",
+    "This section provides some illustrative examples designed to give you a feel for how the code is structured and what your options are in terms of creating subclasses."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Illustration: LSTM Cells"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Both the `Encoder` and the `Decoder` of `torch_color_describer` are currently GRU cells. Switching to another cell type is easy:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Step 1__: Subclass the `Encoder`; all we have to do here is change `GRU` from the original to `LSTM`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch.nn as nn\n",
+    "from torch_color_describer import Encoder\n",
+    "\n",
+    "class LSTMEncoder(Encoder):\n",
+    "    def __init__(self, color_dim, hidden_dim):\n",
+    "        super().__init__(color_dim, hidden_dim)        \n",
+    "        self.rnn = nn.LSTM(\n",
+    "            input_size=self.color_dim,\n",
+    "            hidden_size=self.hidden_dim,\n",
+    "            batch_first=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Step 2__: Subclass the `Decoder`, making the same simple change as above:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch.nn as nn\n",
+    "from torch_color_describer import Encoder, Decoder\n",
+    "\n",
+    "class LSTMDecoder(Decoder):\n",
+    "    def __init__(self, *args, **kwargs):\n",
+    "        super().__init__(*args, **kwargs)        \n",
+    "        self.rnn = nn.LSTM(\n",
+    "            input_size=self.embed_dim,\n",
+    "            hidden_size=self.hidden_dim,\n",
+    "            batch_first=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Step 3__:`ContextualColorDescriber` has a method called `build_graph` that sets up the `Encoder` and `Decoder`. The needed revision just uses `LSTMEncoder`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch_color_describer import EncoderDecoder\n",
+    "\n",
+    "class LSTMContextualColorDescriber(ContextualColorDescriber):     \n",
+    "                \n",
+    "    def build_graph(self):\n",
+    "        \n",
+    "        # Use the new Encoder:\n",
+    "        encoder = LSTMEncoder(\n",
+    "            color_dim=self.color_dim,\n",
+    "            hidden_dim=self.hidden_dim)\n",
+    "\n",
+    "        # Use the new Decoder:\n",
+    "        decoder = LSTMDecoder(\n",
+    "            vocab_size=self.vocab_size,\n",
+    "            embed_dim=self.embed_dim,\n",
+    "            embedding=self.embedding,\n",
+    "            hidden_dim=self.hidden_dim)\n",
+    "\n",
+    "        return EncoderDecoder(encoder, decoder)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here's an example run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lstm_mod = LSTMContextualColorDescriber(\n",
+    "    toy_vocab, \n",
+    "    embed_dim=10, \n",
+    "    hidden_dim=10, \n",
+    "    max_iter=100, \n",
+    "    batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 100; err = 0.14593948423862457"
+     ]
+    }
+   ],
+   "source": [
+    "_ = lstm_mod.fit(toy_color_seqs_train, toy_word_seqs_train)     "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "lstm_mod.listener_accuracy(toy_color_seqs_test, toy_word_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Illustration: Deeper models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `Encoder` and `Decoder` are both currently hard-coded to have just one hidden layer. It is straightforward to make them deeper as long as we ensure that both the `Encoder` and `Decoder` have the same depth; since the `Encoder` final states are the initial hidden states for the `Decoder`, we need this alignment. \n",
+    "\n",
+    "(Strictly speaking, we could have different numbers of `Encoder` and `Decoder` layers, as long as we did some kind of averaging or copying to achieve the hand-off from `Encoder` to `Decocer`. I'll set this possibility aside.)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Step 1__: We need to subclass the `Encoder` and `Decoder` so that they have `num_layers` argument that is fed into the RNN cell:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch.nn as nn\n",
+    "from torch_color_describer import Encoder, Decoder\n",
+    "\n",
+    "class DeepEncoder(Encoder):\n",
+    "    def __init__(self, *args, num_layers=2, **kwargs):\n",
+    "        super().__init__(*args, **kwargs)\n",
+    "        self.num_layers = num_layers\n",
+    "        self.rnn = nn.GRU(\n",
+    "            input_size=self.color_dim,\n",
+    "            hidden_size=self.hidden_dim,\n",
+    "            num_layers=self.num_layers,\n",
+    "            batch_first=True) \n",
+    "\n",
+    "\n",
+    "class DeepDecoder(Decoder):\n",
+    "    def __init__(self, *args, num_layers=2, **kwargs):\n",
+    "        super().__init__(*args, **kwargs)    \n",
+    "        self.num_layers = num_layers\n",
+    "        self.rnn = nn.GRU(\n",
+    "            input_size=self.embed_dim,\n",
+    "            hidden_size=self.hidden_dim,\n",
+    "            num_layers=self.num_layers,\n",
+    "            batch_first=True)   "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Step 2__: As before, we need to update the `build_graph` method of `ContextualColorDescriber`. The needed revision just uses `DeepEncoder` and `DeepDecoder`. To expose this new argument to the user, we also add a new keyword argument to `ContextualColorDescriber`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch_color_describer import EncoderDecoder\n",
+    "\n",
+    "class DeepContextualColorDescriber(ContextualColorDescriber): \n",
+    "    def __init__(self, *args, num_layers=2, **kwargs):\n",
+    "        self.num_layers = num_layers\n",
+    "        super().__init__(*args, **kwargs)\n",
+    "                \n",
+    "    def build_graph(self):\n",
+    "        encoder = DeepEncoder(\n",
+    "            color_dim=self.color_dim,\n",
+    "            hidden_dim=self.hidden_dim,\n",
+    "            num_layers=self.num_layers) # The new piece is this argument.\n",
+    "\n",
+    "        decoder = DeepDecoder(\n",
+    "            vocab_size=self.vocab_size,\n",
+    "            embed_dim=self.embed_dim,\n",
+    "            embedding=self.embedding,\n",
+    "            hidden_dim=self.hidden_dim,\n",
+    "            num_layers=self.num_layers) # The new piece is this argument.\n",
+    "\n",
+    "        return EncoderDecoder(encoder, decoder)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "An example/test run:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mod_deep = DeepContextualColorDescriber(\n",
+    "    toy_vocab, \n",
+    "    embed_dim=10, \n",
+    "    hidden_dim=10, \n",
+    "    max_iter=100,\n",
+    "    batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Epoch 100; err = 0.10894003510475159"
+     ]
+    }
+   ],
+   "source": [
+    "_ = mod_deep.fit(toy_color_seqs_train, toy_word_seqs_train)     "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "1.0"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "mod_deep.listener_accuracy(toy_color_seqs_test, toy_word_seqs_test)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/hw_colors.ipynb b/hw_colors.ipynb
new file mode 100644
index 0000000..e3fedc0
--- /dev/null
+++ b/hw_colors.ipynb
@@ -0,0 +1,952 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Homework and bake-off: pragmatic color descriptions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "__author__ = \"Christopher Potts\"\n",
+    "__version__ = \"CS224u, Stanford, Spring 2020\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Contents\n",
+    "\n",
+    "1. [Overview](#Overview)\n",
+    "1. [Set-up](#Set-up)\n",
+    "1. [All two-word examples as a dev corpus](#All-two-word-examples-as-a-dev-corpus)\n",
+    "1. [Dev dataset](#Dev-dataset)\n",
+    "1. [Random train–test split for development](#Random-train–test-split-for-development)\n",
+    "1. [Question 1: Improve the tokenizer [1 point]](#Question-1:-Improve-the-tokenizer-[1-point])\n",
+    "1. [Use the tokenizer](#Use-the-tokenizer)\n",
+    "1. [Question 2: Improve the color representations [1 point]](#Question-2:-Improve-the-color-representations-[1-point])\n",
+    "1. [Use the color representer](#Use-the-color-representer)\n",
+    "1. [Initial model](#Initial-model)\n",
+    "1. [Question 3: GloVe embeddings [1 points]](#Question-3:-GloVe-embeddings-[1-points])\n",
+    "1. [Try the GloVe representations](#Try-the-GloVe-representations)\n",
+    "1. [Question 4: Color context [3 points]](#Question-4:-Color-context-[3-points])\n",
+    "1. [Your original system [3 points]](#Your-original-system-[3-points])\n",
+    "1. [Bakeoff [1 point]](#Bakeoff-[1-point])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Overview\n",
+    "\n",
+    "This homework and associated bake-off are oriented toward building an effective system for generating color descriptions that are pragmatic in the sense that they would help a reader/listener figure out which color was being referred to in a shared context consisting of a target color (whose identity is known only to the describer/speaker) and a set of distractors.\n",
+    "\n",
+    "The notebook [colors_overview.ipynb](colors_overview.ipynb) should be studied before work on this homework begins. That notebook provides backgroud on the task, the dataset, and the modeling code that you will be using and adapting.\n",
+    "\n",
+    "The homework questions are more open-ended than previous ones have been. Rather than asking you to implement pre-defined functionality, they ask you to try to improve baseline components of the full system in ways that you find to be effective. As usual, this culiminates in a prompt asking you to develop a novel system for entry into the bake-off. In this case, though, the work you do for the homework will likely be directly incorporated into that system."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Set-up"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "See [colors_overview.ipynb](colors_overview.ipynb) for set-up in instructions and other background details."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from colors import ColorsCorpusReader\n",
+    "import os\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from torch_color_describer import (\n",
+    "    ContextualColorDescriber, create_example_dataset)\n",
+    "import utils\n",
+    "from utils import START_SYMBOL, END_SYMBOL, UNK_SYMBOL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "utils.fix_random_seeds()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "COLORS_SRC_FILENAME = os.path.join(\n",
+    "    \"data\", \"colors\", \"filteredCorpus.csv\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## All two-word examples as a dev corpus\n",
+    "\n",
+    "So that you don't have to sit through excessively long training runs during development, I suggest working with the two-word-only subset of the corpus until you enter into the late stages of system testing."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_corpus = ColorsCorpusReader(\n",
+    "    COLORS_SRC_FILENAME, \n",
+    "    word_count=2, \n",
+    "    normalize_colors=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_examples = list(dev_corpus.read())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This subset has about one-third the examples of the full corpus:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(dev_examples)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We __should__ worry that it's not a fully representative sample. Most of the descriptions in the full corpus are shorter, and a large proportion are longer. So this dataset is mainly for debugging, development, and general hill-climbing. All findings should be validated on the full dataset at some point."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Dev dataset\n",
+    "\n",
+    "The first step is to extract the raw color and raw texts from the corpus:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_rawcols, dev_texts = zip(*[[ex.colors, ex.contents] for ex in dev_examples])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The raw color representations are suitable inputs to a model, but the texts are just strings, so they can't really be processed as-is. Question 1 asks you to do some tokenizing!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Random train–test split for development\n",
+    "\n",
+    "For the sake of development runs, we create a random train–test split:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_rawcols_train, dev_rawcols_test, dev_texts_train, dev_texts_test = \\\n",
+    "    train_test_split(dev_rawcols, dev_texts)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question 1: Improve the tokenizer [1 point]\n",
+    "\n",
+    "This is the first required question – the first required modification to the default pipeline.\n",
+    "\n",
+    "The function `tokenize_example` simply splits its string on whitespace and adds the required start and end symbols:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize_example(s):\n",
+    "    \n",
+    "    # Improve me!\n",
+    "    \n",
+    "    return [START_SYMBOL] + s.split() + [END_SYMBOL]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenize_example(dev_texts_train[376])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Your task__: Modify `tokenize_example` so that it does something more sophisticated with the input text. \n",
+    "\n",
+    "__Notes__:\n",
+    "\n",
+    "* There are useful ideas for this in [Monroe et al. 2017](https://transacl.org/ojs/index.php/tacl/article/view/1142)\n",
+    "* There is no requirement that you do word-level tokenization. Sub-word and multi-word are options.\n",
+    "* This question can interact with the size of your vocabulary (see just below), and in turn with decisions about how to use `UNK_SYMBOL`.\n",
+    "\n",
+    "__Important__: don't forget to add the start and end symbols, else the resulting models will definitely be terrible!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use the tokenizer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Once the tokenizer is working, run the following cell to tokenize your inputs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_seqs_train = [tokenize_example(s) for s in dev_texts_train]\n",
+    "\n",
+    "dev_seqs_test = [tokenize_example(s) for s in dev_texts_test]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We use only the train set to derive a vocabulary for the model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_vocab = sorted({w for toks in dev_seqs_train for w in toks}) + [UNK_SYMBOL]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It's important that the `UNK_SYMBOL` is included somewhere in this list. Test examples with word not seen in training will be mapped to `UNK_SYMBOL`. If you model's vocab is the same as your train vocab, then `UNK_SYMBOL` will never be encountered during training, so it will be a random vector at test time."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(dev_vocab)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question 2: Improve the color representations [1 point]\n",
+    "\n",
+    "This is the second required pipeline improvement for the assignment. \n",
+    "\n",
+    "The following functions do nothing at all to the raw input colors we get from the corpus. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def represent_color_context(colors):\n",
+    "    \n",
+    "    # Improve me!\n",
+    "    \n",
+    "    return [represent_color(color) for color in colors]\n",
+    "\n",
+    "\n",
+    "def represent_color(color):\n",
+    "\n",
+    "    # Improve me!\n",
+    "    \n",
+    "    return color"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "represent_color_context(dev_rawcols_train[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Your task__: Modify `represent_color_context` and/or `represent_color` to represent colors in a new way.\n",
+    "    \n",
+    "__Notes__:\n",
+    "\n",
+    "* The Fourier-transform method of [Monroe et al. 2017](https://transacl.org/ojs/index.php/tacl/article/view/1142) is a proven choice.\n",
+    "* You are not required to keep `represent_color`. This might be unnatural if you want to perform an operation on each color trio all at once.\n",
+    "* For that matter, if you want to process all of the color contexts in the entire data set all at once, that is fine too, as long as you can also perform the operation at test time with an unknown number of examples being tested."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use the color representer"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following cell just runs your `represent_color_context` on the train and test sets:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_cols_train = [represent_color_context(colors) for colors in dev_rawcols_train]\n",
+    "\n",
+    "dev_cols_test = [represent_color_context(colors) for colors in dev_rawcols_test]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "At this point, our preprocessing steps are complete, and we can fit a first model."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Initial model\n",
+    "\n",
+    "The first model is configured right now to be a small model run for just a few iterations. It should be enough to get traction, but it's unlikely to be a great model. You are free to modify this configuration if you wish; it is here just for demonstration and testing:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_mod = ContextualColorDescriber(\n",
+    "    dev_vocab, \n",
+    "    embed_dim=10, \n",
+    "    hidden_dim=10, \n",
+    "    max_iter=5, \n",
+    "    batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_ = dev_mod.fit(dev_cols_train, dev_seqs_train)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As discussed in [colors_overview.ipynb](colors_overview.ipynb), our primary metric is `listener_accuracy`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_mod.listener_accuracy(dev_cols_test, dev_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also see the model's predicted sequences given color context inputs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_mod.predict(dev_cols_test[:1])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_seqs_test[:1]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question 3: GloVe embeddings [1 points]\n",
+    "\n",
+    "The above model uses a random initial embedding, as configured by the decoder used by `ContextualColorDescriber`. This homework question asks you to consider using GloVe inputs. \n",
+    "\n",
+    "__Your task__: Complete `create_glove_embedding` so that it creates a GloVe embedding based on your model vocabulary. This isn't mean to be analytically challenging, but rather just to create a basis for you to try out other kinds of rich initialization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "GLOVE_HOME = os.path.join('data', 'glove.6B')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_glove_embedding(vocab, glove_base_filename='glove.6B.50d.txt'):\n",
+    "    \n",
+    "    # Use `utils.glove2dict` to read in the GloVe file:    \n",
+    "    ##### YOUR CODE HERE\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    # Use `utils.create_pretrained_embedding` to create the embedding.\n",
+    "    # This function will, by default, ensure that START_TOKEN, \n",
+    "    # END_TOKEN, and UNK_TOKEN are included in the embedding.\n",
+    "    ##### YOUR CODE HERE\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    # Be sure to return the embedding you create as well as the\n",
+    "    # vocabulary returned by `utils.create_pretrained_embedding`,\n",
+    "    # which is likely to have been modified from the input `vocab`.\n",
+    "    \n",
+    "    ##### YOUR CODE HERE\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Try the GloVe representations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's see if GloVe helped for our development data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_glove_embedding, dev_glove_vocab = create_glove_embedding(dev_vocab)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The above might dramatically change your vocabulary, depending on how many items from your vocab are in the Glove space:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(dev_vocab)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(dev_glove_vocab)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_mod_glove = ContextualColorDescriber(\n",
+    "    dev_glove_vocab, \n",
+    "    embedding=dev_glove_embedding,\n",
+    "    hidden_dim=10, \n",
+    "    max_iter=5, \n",
+    "    batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_ = dev_mod_glove.fit(dev_cols_train, dev_seqs_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dev_mod_glove.listener_accuracy(dev_cols_test, dev_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You probably saw a small boost, assuming your tokeization scheme leads to good overlap with the GloVe vocabulary. The input representations are larger than in our previous model (at least as I configured things), so we would need to do more runs with higher `max_iter` values to see whether this is worthwhile overall."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Question 4: Color context [3 points]\n",
+    "\n",
+    "The final required homework question is the most challenging, but it should set you up to think in much more flexible ways about the underlying model we're using.\n",
+    "\n",
+    "The question asks you to modify various model components in `torch_color_describer.py`. The section called [Modifying the core model](colors_overview.ipynb#Modifying-the-core-model) from the core unit notebook provides a number of examples illustrating the basic techniques, so you might review that material if you get stuck here.\n",
+    "\n",
+    "__Your task__: [Monroe et al. 2017](https://transacl.org/ojs/index.php/tacl/article/view/1142) append the target color (the final one in the context) to each input token that gets processed by the decoder. The question asks you to subclass the `Decoder` and `EncoderDecoder` from `torch_color_describer.py` so that you can build models that do this."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Step 1__: Modify the `Decoder` so that the input vector to the model at each timestep is not just a token representaton `x` but the concatenation of `x` with the representation of the target color.\n",
+    "\n",
+    "__Notes__:\n",
+    "\n",
+    "* You might notice at this point that the original `Decoder.forward` method has an optional keyword argument `target_colors` that is passed to `Decoder.get_embeddings`. Because this is already in place, all you have to do is modify the `get_embeddings` method to use this argument.\n",
+    "\n",
+    "* The change affects the configuration of `self.rnn`, so you need to subclass the `__init__` method as well, so that its `input_size` argument accomodates the embedding as well as the color representations.\n",
+    "\n",
+    "* You can do the relevant operations efficiently in pure PyTorch using `repeat_interleave` and `cat`, but the important thing is to get a working implementation – you can always optimize the code later if the ideas prove useful to you. \n",
+    "\n",
+    "Here's skeleton code for you to flesh out:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from torch_color_describer import Decoder\n",
+    "\n",
+    "class ColorContextDecoder(Decoder):    \n",
+    "    def __init__(self, color_dim, *args, **kwargs):\n",
+    "        self.color_dim = color_dim\n",
+    "        super().__init__(*args, **kwargs)\n",
+    "        \n",
+    "        # Fix the `self.rnn` attribute:\n",
+    "        ##### YOUR CODE HERE\n",
+    "\n",
+    "\n",
+    "        \n",
+    "\n",
+    "    def get_embeddings(self, word_seqs, target_colors=None):  \n",
+    "        \"\"\"You can assume that `target_colors` is a tensor of shape \n",
+    "        (m, n), where m is the length of the batch (same as \n",
+    "        `word_seqs.shape[0]`) and n is the dimensionality of the \n",
+    "        color representations the model is using. The goal is\n",
+    "        to attached each color vector i to each of the tokens in\n",
+    "        the ith sequence of (the embedded version of) `word_seqs`.\n",
+    "        \n",
+    "        \"\"\"        \n",
+    "        ##### YOUR CODE HERE\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Step 2__: Modify the `EncoderDecoder`. For this, you just need to make a small change to the `forward` method: extract the target colors from `color_seqs` and feed them to the decoder."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch_color_describer import EncoderDecoder\n",
+    "\n",
+    "class ColorizedEncoderDecoder(EncoderDecoder):\n",
+    "    \n",
+    "    def forward(self, \n",
+    "            color_seqs, \n",
+    "            word_seqs, \n",
+    "            seq_lengths=None, \n",
+    "            hidden=None, \n",
+    "            targets=None):\n",
+    "        if hidden is None:\n",
+    "            hidden = self.encoder(color_seqs)\n",
+    "            \n",
+    "        # Extract the target colors from `color_seqs` and \n",
+    "        # feed them to the decoder, which already has a\n",
+    "        # `target_colors` keyword.        \n",
+    "        ##### YOUR CODE HERE\n",
+    "\n",
+    "\n",
+    "        \n",
+    "        return output, hidden, targets"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "__Step 3__: Finally, as in the examples in [Modifying the core model](colors_overview.ipynb#Modifying-the-core-model), you need to modify the `build_graph` method of `ContextualColorDescriber` so that it uses your new `ColorContextDecoder` and `ColorizedEncoderDecoder`. Here's starter code:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch_color_describer import Encoder\n",
+    "\n",
+    "class ColorizedInputDescriber(ContextualColorDescriber):\n",
+    "        \n",
+    "    def build_graph(self):\n",
+    "        \n",
+    "        # We didn't modify the encoder, so this is\n",
+    "        # just copied over from the original:\n",
+    "        encoder = Encoder(\n",
+    "            color_dim=self.color_dim,\n",
+    "            hidden_dim=self.hidden_dim)\n",
+    "\n",
+    "        # Use your `ColorContextDecoder`, making sure\n",
+    "        # to pass in all the keyword arguments coming\n",
+    "        # from `ColorizedInputDescriber`:\n",
+    "        \n",
+    "        ##### YOUR CODE HERE\n",
+    "\n",
+    "\n",
+    "        \n",
+    "        # Return a `ColorizedEncoderDecoder` that uses\n",
+    "        # your encoder and decoder:\n",
+    "        \n",
+    "        ##### YOUR CODE HERE\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "That's it! Since these modifications are pretty intricate, you might want to use [a toy dataset](colors_overview.ipynb#Toy-problems-for-development-work) to debug it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_color_seqs, toy_word_seqs, toy_vocab = create_example_dataset(\n",
+    "    group_size=50, vec_dim=2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_color_seqs_train, toy_color_seqs_test, toy_word_seqs_train, toy_word_seqs_test = \\\n",
+    "    train_test_split(toy_color_seqs, toy_word_seqs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_mod = ColorizedInputDescriber(\n",
+    "    toy_vocab, \n",
+    "    embed_dim=10, \n",
+    "    hidden_dim=10, \n",
+    "    max_iter=100, \n",
+    "    batch_size=128)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "_ = toy_mod.fit(toy_color_seqs_train, toy_word_seqs_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "toy_mod.listener_accuracy(toy_color_seqs_test, toy_word_seqs_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If that worked, then you can now try this model on SCC problems!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Your original system [3 points]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "There are many options for your original system, which consists of the full pipeline – all preprocessing and modeling steps. You are free to use any model you like, as long as you subclass `ContextualColorDescriber` in a way that allows its `listener_accuracy` method to behave in the expected way.\n",
+    "\n",
+    "So that we can evaluate models in a uniform way for the bake-off, we ask that you modify the function `my_original_system` below so that it accepts a trained instance of your model and does any preprocessing steps required by your model.\n",
+    "\n",
+    "If we seek to reproduce your results, we will rerun this entire notebook. Thus, it is fine if your `my_original_system` makes use of functions you wrote or modified above this cell."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def my_original_system(trained_model, color_seqs_test, texts_test): \n",
+    "    \"\"\"Feel free to modify this code to accommodate the needs of\n",
+    "    your system. Just keep in mind that it will get raw corpus\n",
+    "    examples as inputs for the bake-off.\n",
+    "    \n",
+    "    \"\"\"    \n",
+    "    # `word_seqs_test` is a list of strings, so tokenize each of\n",
+    "    # its elements:    \n",
+    "    tok_seqs = [tokenize_example(s) for s in texts_test]\n",
+    "    \n",
+    "    col_seqs = [represent_color_context(colors) \n",
+    "                for colors in color_seqs_test]\n",
+    "\n",
+    "    # Return the `listener_accuracy` for your model:\n",
+    "    return trained_model.listener_accuracy(col_seqs, tok_seqs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If `my_original_system` works on test sets you create from the corpus distribution, then it will works for the bake-off, so consider checking that. For example, this would check that `dev_mod` above passes muster:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_original_system(dev_mod, dev_rawcols_test, dev_texts_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the cell below, please provide a brief technical description of your original system, so that the teaching team can gain an understanding of what it does. This will help us to understand your code and analyze all the submissions to identify patterns and strategies."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Enter your system description in this cell.\n",
+    "# Please do not remove this comment.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Bakeoff [1 point]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "For the bake-off, we will release a test set. The announcement will go out on the discussion forum. You will evaluate your custom model from the previous question on these new datasets using your `my_original_system` function. Rules:\n",
+    "\n",
+    "1. Only one evaluation is permitted.\n",
+    "1. No additional system tuning is permitted once the bake-off has started.\n",
+    "\n",
+    "The cells below this one constitute your bake-off entry.\n",
+    "\n",
+    "People who enter will receive the additional homework point, and people whose systems achieve the top score will receive an additional 0.5 points. We will test the top-performing systems ourselves, and only systems for which we can reproduce the reported results will win the extra 0.5 points.\n",
+    "\n",
+    "Late entries will be accepted, but they cannot earn the extra 0.5 points. Similarly, you cannot win the bake-off unless your homework is submitted on time.\n",
+    "\n",
+    "The announcement will include the details on where to submit your entry."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Enter your bake-off assessment code in this cell. \n",
+    "# Please do not remove this comment.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# On an otherwise blank line in this cell, please enter\n",
+    "# your listener_accuracy score as reported by the code\n",
+    "# above. Please enter only a number between 0 and 1 inclusive. \n",
+    "# Please do not remove this comment.\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/test/colors-test-data.json b/test/colors-test-data.json
new file mode 100644
index 0000000..9ca9f7f
--- /dev/null
+++ b/test/colors-test-data.json
@@ -0,0 +1,78 @@
+[
+    {
+        "D1D2Diff": "17",
+        "alt1ColH": "226",
+        "alt1ColL": "50",
+        "alt1ColS": "81",
+        "alt1LocL": "2",
+        "alt1LocS": "1",
+        "alt1Status": "target",
+        "alt2ColH": "283",
+        "alt2ColL": "50",
+        "alt2ColS": "87",
+        "alt2LocL": "3",
+        "alt2LocS": "2",
+        "alt2Status": "distr1",
+        "clickColH": "248",
+        "clickColL": "50",
+        "clickColS": "92",
+        "clickLocL": "1",
+        "clickLocS": "3",
+        "clickStatus": "distr2",
+        "clkTime": "1459877206546.0",
+        "condition": "close",
+        "contents": "Blue",
+        "gameid": "1124-1",
+        "msgTime": "1459877203862.0",
+        "numCleanChars": "13",
+        "numCleanWords": "3",
+        "numOutcome": "0.0",
+        "numRawChars": "16",
+        "numRawWords": "4",
+        "outcome": "false",
+        "role": "speaker",
+        "roundNum": "1",
+        "source": "human",
+        "targetD1Diff": "19",
+        "targetD2Diff": "10",
+        "workerid_uniq": "201"
+    },
+    {
+        "D1D2Diff": "17",
+        "alt1ColH": "226",
+        "alt1ColL": "50",
+        "alt1ColS": "81",
+        "alt1LocL": "2",
+        "alt1LocS": "1",
+        "alt1Status": "target",
+        "alt2ColH": "283",
+        "alt2ColL": "50",
+        "alt2ColS": "87",
+        "alt2LocL": "3",
+        "alt2LocS": "2",
+        "alt2Status": "distr1",
+        "clickColH": "248",
+        "clickColL": "50",
+        "clickColS": "92",
+        "clickLocL": "1",
+        "clickLocS": "3",
+        "clickStatus": "distr2",
+        "clkTime": "1459877206546.0",
+        "condition": "close",
+        "contents": "The darker blue one",
+        "gameid": "1124-1",
+        "msgTime": "1459877203862.0",
+        "numCleanChars": "13",
+        "numCleanWords": "3",
+        "numOutcome": "0.0",
+        "numRawChars": "16",
+        "numRawWords": "4",
+        "outcome": "false",
+        "role": "speaker",
+        "roundNum": "1",
+        "source": "human",
+        "targetD1Diff": "19",
+        "targetD2Diff": "10",
+        "workerid_uniq": "201"
+    }
+]
\ No newline at end of file
diff --git a/test/test_colors.py b/test/test_colors.py
new file mode 100644
index 0000000..f8ac7d2
--- /dev/null
+++ b/test/test_colors.py
@@ -0,0 +1,61 @@
+from colors import ColorsCorpusReader, ColorsCorpusExample, TURN_BOUNDARY
+import json
+import os
+import pytest
+
+__author__ = "Christopher Potts"
+__version__ = "CS224u, Stanford, Spring 2020"
+
+
+@pytest.fixture
+def test_rows():
+    src_filename = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        'colors-test-data.json')
+    with open(src_filename) as f:
+        data = json.load(f)
+    return data
+
+
+# These are the colors in the test data file loaded by `test_rows`:
+alt1 = [226.0, 50.0, 81.0]
+alt2 = [283.0, 50.0, 87.0]
+click = [248.0, 50.0, 92.0]
+
+
+@pytest.mark.parametrize("attr, expected", [
+    ['contents', 'Blue{}The darker blue one'.format(TURN_BOUNDARY)],
+    ['gameid', '1124-1'],
+    ['roundNum', 1],
+    ['outcome', False],
+    ['clickStatus', 'distr2'],
+    ['colors', [alt2, click, alt1]],
+    ['listener_context', [click, alt1, alt2]],
+    ['speaker_context', [alt1, alt2, click]]
+])
+def test_color_corpus_example(attr, expected, test_rows):
+    ex = ColorsCorpusExample(test_rows, normalize_colors=False)
+    result = getattr(ex, attr)
+    assert result == expected
+
+
+def test_normalize_colors(test_rows):
+    ex = ColorsCorpusExample(test_rows, normalize_colors=True)
+    result = ex.colors[0]
+    h, l, s = alt2
+    expected = [h/360, l/100, s/100]
+    assert result == expected
+
+
+def test_parse_turns(test_rows):
+    ex = ColorsCorpusExample(test_rows)
+    result = ex.parse_turns()
+    expected = ['Blue', 'The darker blue one']
+    assert result == expected
+
+
+def test_check_row_alignment(test_rows):
+    rows = test_rows.copy()
+    rows[0]['clickStatus'] = 'deliberate change'
+    with pytest.raises(RuntimeError):
+        ex = ColorsCorpusExample(rows)
diff --git a/test/test_models.py b/test/test_models.py
index b727860..bc100b4 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -9,6 +9,7 @@
 import tempfile
 import torch.nn as nn
 import utils
+from utils import START_SYMBOL, END_SYMBOL, UNK_SYMBOL
 
 import np_sgd_classifier
 import np_shallow_neural_classifier
@@ -20,6 +21,7 @@
 import torch_rnn_classifier
 import torch_autoencoder
 import torch_tree_nn
+import torch_color_describer
 
 __author__ = "Christopher Potts"
 __version__ = "CS224u, Stanford, Spring 2020"
@@ -98,6 +100,13 @@ def cheese_disease_dataset():
             'vocab': vocab}
 
 
+@pytest.fixture
+def color_describer_dataset():
+    color_seqs, word_seqs, vocab = torch_color_describer.create_example_dataset(
+        group_size=50, vec_dim=2)
+    return color_seqs, word_seqs, vocab
+
+
 def test_np_shallow_neural_classifier(XOR):
     """Just makes sure that this code will run; it doesn't check that
     it is creating good models.
@@ -309,6 +318,13 @@ def test_sgd_classifier():
     assert acc >= 0.89
 
 
+@pytest.mark.parametrize("initial_embedding", [True, False])
+def test_torch_color_describer_simple_example(initial_embedding):
+    acc = torch_color_describer.simple_example(
+        initial_embedding=initial_embedding)
+    assert acc > 0.95
+
+
 @pytest.mark.parametrize("model, params", [
     [
         np_sgd_classifier.BasicSGDClassifier(max_iter=10, eta=0.1),
@@ -383,6 +399,17 @@ def test_sgd_classifier():
             'max_iter': 10,
             'eta': 0.1
         }
+    ],
+    [
+        torch_color_describer.ContextualColorDescriber(
+            vocab=[START_SYMBOL, END_SYMBOL, UNK_SYMBOL],
+            hidden_dim=5, embed_dim=5, max_iter=1, eta=1.0),
+        {
+            'hidden_dim': 10,
+            'embed_dim': 10,
+            'max_iter': 10,
+            'eta': 0.1
+        }
     ]
 ])
 def test_parameter_setting(model, params):
@@ -403,6 +430,20 @@ def test_rnn_classifier_cross_validation(model_class, X_sequence):
         X, y, mod, cv=2, param_grid={'hidden_dim': [10, 20]})
 
 
+def test_color_describer_cross_validation(color_describer_dataset):
+    color_seqs, word_seqs, vocab = color_describer_dataset
+    mod = torch_color_describer.ContextualColorDescriber(
+        vocab,
+        embed_dim=10,
+        hidden_dim=10,
+        max_iter=100,
+        embedding=None)
+    best_mod = utils.fit_classifier_with_crossvalidation(
+        color_seqs, word_seqs, mod, cv=2,
+        scoring=None,
+        param_grid={'hidden_dim': [10, 20]})
+
+
 def test_torch_shallow_neural_classifier_save_load(XOR):
     X, y = XOR
     mod = torch_shallow_neural_classifier.TorchShallowNeuralClassifier(
@@ -465,3 +506,21 @@ def test_torch_tree_nn_save_load(X_tree):
         mod2 = torch_tree_nn.TorchTreeNN.from_pickle(name)
         mod2.predict(X)
         mod2.fit(X)
+
+
+def test_torch_color_describer_save_load(color_describer_dataset):
+    color_seqs, word_seqs, vocab = color_describer_dataset
+    mod = torch_color_describer.ContextualColorDescriber(
+        vocab,
+        embed_dim=10,
+        hidden_dim=10,
+        max_iter=100,
+        embedding=None)
+    mod.fit(color_seqs, word_seqs)
+    mod.predict(color_seqs)
+    with tempfile.NamedTemporaryFile(mode='wb') as f:
+        name = f.name
+        mod.to_pickle(name)
+        mod2 = torch_color_describer.ContextualColorDescriber.from_pickle(name)
+        mod2.predict(color_seqs)
+        mod2.fit(color_seqs, word_seqs)
diff --git a/torch_color_describer.py b/torch_color_describer.py
new file mode 100644
index 0000000..e4a2633
--- /dev/null
+++ b/torch_color_describer.py
@@ -0,0 +1,738 @@
+import itertools
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.utils.data
+from torch_model_base import TorchModelBase
+import utils
+from utils import START_SYMBOL, END_SYMBOL, UNK_SYMBOL
+
+__author__ = "Christopher Potts"
+__version__ = "CS224u, Stanford, Spring 2020"
+
+
+class ColorDataset(torch.utils.data.Dataset):
+    """PyTorch dataset for contextual color describers. The primary
+    function of this dataset is to organize the raw data into
+    batches of Tensors of the appropriate shape and type. When
+    using this dataset with `torch.utils.data.DataLoader`, it is
+    crucial to supply the `collate_fn` method as the argument for
+    the `DataLoader.collate_fn` parameter.
+
+    Parameters
+    ----------
+    color_seqs : list of lists of lists of floats, or np.array
+        Dimension (m, n, p) where m is the number of examples, n is
+        the number of colors in each context, and p is the length
+        of the color representations.
+    word_seqs : list of list of int
+        Dimension m, the number of examples. The length of each
+        sequence can vary.
+    ex_lengths : list of int
+        Dimension m. Each value gives the length of the corresponding
+        word sequence in `word_seqs`.
+
+    """
+    def __init__(self, color_seqs, word_seqs, ex_lengths):
+        assert len(color_seqs) == len(ex_lengths)
+        assert len(color_seqs) == len(word_seqs)
+        self.color_seqs = color_seqs
+        self.word_seqs = word_seqs
+        self.ex_lengths = ex_lengths
+
+    @staticmethod
+    def collate_fn(batch):
+        """Function for creating batches.
+
+        Parameter
+        ---------
+        batch : tuple of length 3
+            Contains the `color_seqs`, `word_seqs`, and `ex_lengths`,
+            all as lists or similar Python iterables. The function
+            turns them into Tensors.
+
+        Returns
+        -------
+        color_seqs : torch.FloatTensor
+            Dimension (m, n, p).
+        word_seqs : torch.LongTensor
+            This is a padded sequence, dimension (m, k), where k is
+            the length of the longest sequence in the batch.
+        ex_lengths : torch.LongTensor
+        targets :  torch.LongTensor
+            This is a padded sequence, dimension (m, k-1), where k is
+            the length of the longest sequence in the batch. The
+            targets match `word_seqs` except we drop the first symbol,
+            as it is always START_SYMBOL. When the loss is calculated,
+            we compare this sequence to `word_seqs` excluding the
+            final character, which is always the END_SYMBOL. The result
+            is that each timestep t is trained to predict the symbol
+            at t+1.
+
+        """
+        color_seqs, word_seqs, ex_lengths = zip(*batch)
+        # Conversion to Tensors:
+        color_seqs = torch.FloatTensor(color_seqs)
+        word_seqs = [torch.LongTensor(seq) for seq in word_seqs]
+        ex_lengths = torch.LongTensor(ex_lengths)
+        # Targets as next-word predictions:
+        targets = [x[1: , ] for x in word_seqs]
+        # Padding
+        word_seqs = torch.nn.utils.rnn.pad_sequence(
+            word_seqs, batch_first=True)
+        targets = torch.nn.utils.rnn.pad_sequence(
+            targets, batch_first=True)
+        return color_seqs, word_seqs, ex_lengths, targets
+
+    def __len__(self):
+        return len(self.color_seqs)
+
+    def __getitem__(self, idx):
+        return (self.color_seqs[idx], self.word_seqs[idx], self.ex_lengths[idx])
+
+
+class Encoder(nn.Module):
+    """Simple Encoder model based on a GRU cell.
+
+    Parameters
+    ----------
+    color_dim : int
+    hidden_dim : int
+
+    """
+    def __init__(self, color_dim, hidden_dim):
+        super(Encoder, self).__init__()
+        self.color_dim = color_dim
+        self.hidden_dim = hidden_dim
+        self.rnn = nn.GRU(
+            input_size=self.color_dim,
+            hidden_size=self.hidden_dim,
+            batch_first=True)
+
+    def forward(self, color_seqs):
+        output, hidden = self.rnn(color_seqs)
+        return hidden
+
+
+class Decoder(nn.Module):
+    """Simple Decoder model based on a GRU cell. The hidden
+    representations of the GRU are passed through a dense linear
+    layer, and those logits are used to train the language model
+    according to a softmax objective in `ContextualColorDescriber`.
+
+    Parameters
+    ----------
+    vocab_size : int
+    embed_dim : int
+    hidden_dim : int
+    embedding : np.array or None
+        If `None`, a random embedding is created. If `np.array`, this
+        value becomes the embedding.
+
+    """
+    def __init__(self, vocab_size, embed_dim, hidden_dim, embedding=None):
+        super(Decoder, self).__init__()
+        self.vocab_size = vocab_size
+        self.embedding = self._define_embedding(embedding, vocab_size, embed_dim)
+        self.embed_dim = self.embedding.embedding_dim
+        self.hidden_dim = hidden_dim
+        self.rnn = nn.GRU(
+            input_size=self.embed_dim,
+            hidden_size=self.hidden_dim,
+            batch_first=True)
+        self.output_layer = nn.Linear(self.hidden_dim, self.vocab_size)
+
+    def forward(self, word_seqs, seq_lengths=None, hidden=None, target_colors=None):
+
+        embs = self.get_embeddings(word_seqs, target_colors=target_colors)
+
+        if self.training:
+            # Packed sequence for performance:
+            embs = torch.nn.utils.rnn.pack_padded_sequence(
+                embs, batch_first=True, lengths=seq_lengths, enforce_sorted=False)
+            # RNN forward:
+            output, hidden = self.rnn(embs, hidden)
+            # Unpack:
+            output, seq_lengths = torch.nn.utils.rnn.pad_packed_sequence(
+                output, batch_first=True)
+            # Output dense layer to get logits:
+            output = self.output_layer(output)
+            # Drop the final element:
+            output = output[: , : -1, :]
+            # Reshape for the sake of the loss function:
+            output = output.transpose(1, 2)
+            return output, hidden
+        else:
+            output, hidden = self.rnn(embs, hidden)
+            output = self.output_layer(output)
+            return output, hidden
+
+    def get_embeddings(self, word_seqs, target_colors=None):
+        """Gets the input token representations. At present, these are
+        just taken directly from `self.embedding`, but `target_colors`
+        can be made available in case the user wants to subclass this
+        function to append these representations to each input token.
+
+        Parameters
+        ----------
+        word_seqs : torch.LongTensor
+            This is a padded sequence, dimension (m, k), where k is
+            the length of the longest sequence in the batch.
+        target_colors : torch.FloatTensor
+            Dimension (m, c), where m is the number of exampkes and
+            c is the dimensionality of the color representations.
+
+        """
+        return self.embedding(word_seqs)
+
+    @staticmethod
+    def _define_embedding(embedding, vocab_size, embed_dim):
+        if embedding is None:
+            return nn.Embedding(vocab_size, embed_dim)
+        else:
+            embedding = torch.FloatTensor(embedding)
+            return nn.Embedding.from_pretrained(embedding)
+
+
+class EncoderDecoder(nn.Module):
+    """This class knits the `Encoder` and `Decoder` into a single class
+    that serves as the model for `ContextualColorDescriber`. This is
+    largely a convenience: it means that `ContextualColorDescriber`
+    can use a single `model` argument, and it allows us to localize
+    the core computations in the `forward` method of this class.
+
+    Parameters
+    ----------
+    encoder : `Encoder`
+    decoder : `Decoder`
+
+    """
+    def __init__(self, encoder, decoder):
+        super(EncoderDecoder, self).__init__()
+        self.encoder = encoder
+        self.decoder = decoder
+
+    def forward(self,
+            color_seqs,
+            word_seqs,
+            seq_lengths=None,
+            hidden=None,
+            targets=None):
+        """This is the core method for this module. It has a lot of
+        arguments mainly to make it easy to create subclasses of this
+        class that do interesting things without requring modifications
+        to the `fit` method of `ContextualColorDescriber`.
+
+        Parameters
+        ----------
+        color_seqs : torch.FloatTensor
+            Dimension (m, n, p), where m is the number of examples,
+            n is the number of colors in each context, and p is the
+            dimensionality of each color.
+        word_seqs : torch.LongTensor
+            Dimension (m, k), where m is the number of examples and k
+            is the length of all the (padded) sequences in the batch.
+        seq_lengths : torch.LongTensor or None
+            The true lengths of the sequences in `word_seqs`. If this
+            is None, then we are predicting new sequences, so we will
+            continue predicting until we hit a maximum length or we
+            generate STOP_SYMBOL.
+        hidden : torch.FloatTensor or None
+            The hidden representation for each of the m examples in this
+            batch. If this is None, we are predicting new sequences
+            and so the hidden representation is computed for each timestep
+            during decoding.
+        targets : torch.LongTensor
+            Dimension (m, k-1). These are ignored entirely by the current
+            implementation, but they are passed in so that they could be
+            used, for example, to allow some non-teacher-forcing training.
+
+        Returns
+        -------
+        output : torch.FloatTensor
+            Dimension (m, k, c), where m is the number of examples, k
+            is the length of the sequences in this batch, and c is the
+            number of classes (the size of the vocabulary).
+        hidden : torch.FloatTensor
+            Dimension (m, h) where m is the number of examples and h is
+            the dimensionality of the hidden representations of the model.
+        targets : torch.LongTensor
+            Should be identical to `targets` as passed in.
+
+        """
+        if hidden is None:
+            hidden = self.encoder(color_seqs)
+        output, hidden = self.decoder(
+            word_seqs, seq_lengths=seq_lengths, hidden=hidden)
+        return output, hidden, targets
+
+
+class ContextualColorDescriber(TorchModelBase):
+    """The primary interface to modeling contextual colors datasets.
+
+    Parameters
+    ----------
+    vocab : list of str
+        This should be the vocabulary. It needs to be aligned with
+         `embedding` in the sense that the ith element of vocab
+        should be represented by the ith row of `embedding`.
+    embedding : np.array or None
+        Each row represents a word in `vocab`, as described above.
+    embed_dim : int
+        Dimensionality for the initial embeddings. This is ignored
+        if `embedding` is not None, as a specified value there
+        determines this value.
+    hidden_dim : int
+        Dimensionality of the hidden layer.
+    max_iter : int
+        Maximum number of training epochs.
+    eta : float
+        Learning rate.
+    optimizer : PyTorch optimizer
+        Default is `torch.optim.Adam`.
+    l2_strength : float
+        L2 regularization strength. Default 0 is no regularization.
+    warm_start : bool
+        If True, calling `fit` will resume training with previously
+        defined trainable parameters. If False, calling `fit` will
+        reinitialize all trainable parameters. Default: False.
+    device : 'cpu' or 'cuda'
+        The default is to use 'cuda' iff available
+
+    """
+    def __init__(self,
+            vocab,
+            embedding=None,
+            embed_dim=50,
+            hidden_dim=50,
+            **kwargs):
+        super(ContextualColorDescriber, self).__init__(
+            hidden_dim=hidden_dim, **kwargs)
+        self.vocab = vocab
+        self.embedding = embedding
+        self.vocab_size = len(vocab)
+        self.word2index = dict(zip(self.vocab, range(self.vocab_size)))
+        self.index2word = dict(zip(range(self.vocab_size), self.vocab))
+        self.embed_dim = embed_dim
+        self.output_dim = self.vocab_size
+        self.start_index = self.vocab.index(START_SYMBOL)
+        self.end_index = self.vocab.index(END_SYMBOL)
+        self.unk_index = self.vocab.index(UNK_SYMBOL)
+        self.params += ['embed_dim', 'embedding']
+        # The base class has this attribute, but this model doesn't,
+        # so we remove it to avoid misleading people:
+        delattr(self, 'hidden_activation')
+        self.params.remove('hidden_activation')
+
+    def fit(self, color_seqs, word_seqs):
+        """Standard `fit` method where `color_seqs` are the inputs and
+        `word_seqs` are the sequences to predict.
+
+        Parameters
+        ----------
+        color_seqs : list of lists of lists of floats, or np.array
+            Dimension (m, n, p) where m is the number of examples, n is
+            the number of colors in each context, and p is the length
+            of the color representations.
+        word_seqs : list of list of int
+            Dimension m, the number of examples. The length of each
+            sequence can vary.
+
+        Returns
+        -------
+        self
+
+        """
+        self.color_dim = len(color_seqs[0][0])
+
+        if not self.warm_start or not hasattr(self, "model"):
+            self.model = self.build_graph()
+
+        # Make sure that these attributes are aligned -- important
+        # where a supplied pretrained embedding has determined
+        # a `embed_dim` that might be different from the user's
+        # argument.
+        self.embed_dim = self.model.decoder.embed_dim
+
+        self.model.to(self.device)
+
+        self.model.train()
+
+        dataset = self.build_dataset(color_seqs, word_seqs)
+
+        dataloader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=self.batch_size,
+            shuffle=True,
+            drop_last=False,
+            pin_memory=True,
+            collate_fn=dataset.collate_fn)
+
+        loss = nn.CrossEntropyLoss()
+
+        optimizer = self.optimizer(
+            self.model.parameters(),
+            lr=self.eta,
+            weight_decay=self.l2_strength)
+
+        for iteration in range(1, self.max_iter+1):
+            epoch_error = 0.0
+            for batch_colors, batch_words, batch_lens, targets in dataloader:
+
+                batch_colors = batch_colors.to(self.device)
+                batch_words = batch_words.to(self.device)
+                batch_lens = batch_lens.to(self.device)
+                targets = targets.to(self.device)
+
+                output, _, targets = self.model(
+                    color_seqs=batch_colors,
+                    word_seqs=batch_words,
+                    seq_lengths=batch_lens,
+                    targets=targets)
+
+                err = loss(output, targets)
+                epoch_error += err.item()
+                optimizer.zero_grad()
+                err.backward()
+                optimizer.step()
+
+            utils.progress_bar("Epoch {}; err = {}".format(iteration, epoch_error))
+
+        return self
+
+    def build_dataset(self, color_seqs, word_seqs):
+        word_seqs = [[self.word2index.get(w, self.unk_index) for w in seq]
+                     for seq in word_seqs]
+        ex_lengths = [len(seq) for seq in word_seqs]
+        return ColorDataset(color_seqs, word_seqs, ex_lengths)
+
+    def build_graph(self):
+        encoder = Encoder(
+            color_dim=self.color_dim,
+            hidden_dim=self.hidden_dim)
+
+        decoder = Decoder(
+            vocab_size=self.vocab_size,
+            embed_dim=self.embed_dim,
+            embedding=self.embedding,
+            hidden_dim=self.hidden_dim)
+
+        return EncoderDecoder(encoder, decoder)
+
+    def predict(self, color_seqs, max_length=20):
+        """Predict new sequences based on the color contexts in
+        `color_seqs`.
+
+        Parameters
+        ----------
+        color_seqs : list of lists of lists of floats, or np.array
+            Dimension (m, n, p) where m is the number of examples, n is
+            the number of colors in each context, and p is the length
+            of the color representations.
+        max_length : int
+            Length of the longest sequences to create.
+
+        Returns
+        -------
+        list of str
+
+        """
+        color_seqs = torch.FloatTensor(color_seqs)
+        self.model.to("cpu")
+        self.model.eval()
+        preds = []
+        with torch.no_grad():
+            # Get the hidden representations from the color contexts:
+            hidden = self.model.encoder(color_seqs)
+
+            # Start with START_SYMBOL for all examples:
+            decoder_input = [[self.start_index]]  * len(color_seqs)
+            decoder_input = torch.LongTensor(decoder_input)
+            preds.append(decoder_input)
+
+            # Now move through the remaiming timesteps using the
+            # previous timestep to predict the next one:
+            for i in range(1, max_length):
+
+                output, hidden, _ = self.model(
+                    color_seqs=color_seqs,
+                    word_seqs=decoder_input,
+                    seq_lengths=None,
+                    hidden=hidden)
+
+                # Always take the highest probability token to
+                # be the prediction:
+                p = output.argmax(2)
+                preds.append(p)
+                decoder_input = p
+
+        # Convert all the predictions from indices to elements of
+        # `self.vocab`:
+        preds = torch.cat(preds, axis=1)
+        preds = [self._convert_predictions(p) for p in preds]
+        return preds
+
+    def _convert_predictions(self, pred):
+        rep = []
+        for i in pred:
+            i = i.item()
+            rep.append(self.index2word[i])
+            if i == self.end_index:
+                return rep
+        return rep
+
+    def predict_proba(self, color_seqs, word_seqs):
+        """Calculate the predicted probabilties of the sequences in
+        `word_seqs` given the color contexts in `color_seqs`.
+
+        Parameters
+        ----------
+        color_seqs : list of lists of lists of floats, or np.array
+            Dimension (m, n, p) where m is the number of examples, n is
+            the number of colors in each context, and p is the length
+            of the color representations.
+        word_seqs : list of list of int
+            Dimension m, the number of examples. The length of each
+            sequence can vary.
+
+        Returns
+        -------
+        list of lists of predicted probabilities. In other words,
+        for each example, at each timestep, there is a probability
+        distribution over the entire vocabulary.
+
+        """
+
+        dataset = self.build_dataset(color_seqs, word_seqs)
+
+        dataloader = torch.utils.data.DataLoader(
+            dataset,
+            batch_size=self.batch_size,
+            shuffle=False,
+            drop_last=False,
+            pin_memory=True,
+            collate_fn=dataset.collate_fn)
+
+        self.model.eval()
+
+        softmax = nn.Softmax(dim=2)
+
+        start_probs = np.zeros(self.vocab_size)
+        start_probs[self.start_index] = 1.0
+
+        all_probs = []
+
+        with torch.no_grad():
+
+            for batch_colors, batch_words, batch_lens, targets in dataloader:
+
+                batch_colors = batch_colors.to(self.device)
+                batch_words = batch_words.to(self.device)
+                batch_lens = batch_lens.to(self.device)
+
+                output, _, _ = self.model(
+                    color_seqs=batch_colors,
+                    word_seqs=batch_words,
+                    seq_lengths=batch_lens)
+
+                probs = softmax(output)
+                probs = probs.cpu().numpy()
+                probs = np.insert(probs, 0, start_probs, axis=1)
+                all_probs += [p[: n] for p, n in zip(probs, batch_lens)]
+
+        return all_probs
+
+    def perplexities(self, color_seqs, word_seqs):
+        """Compute the perplexity of each sequence in `word_seqs`
+        given `color_seqs`. For a sequence of conditional probabilities
+        p1, p2, ..., pN, the perplexity is calculated as
+
+        (p1 * p2 * ... * pN)**(-1/N)
+
+        Parameters
+        ----------
+        color_seqs : list of lists of floats, or np.array
+            Dimension (m, n, p) where m is the number of examples, n is
+            the number of colors in each context, and p is the length
+            of the color representations.
+        word_seqs : list of list of int
+            Dimension m, the number of examples, and the length of
+            each sequence can vary.
+
+        Returns
+        -------
+        list of float
+
+        """
+        probs = self.predict_proba(color_seqs, word_seqs)
+        scores = []
+        for pred, seq in zip(probs, word_seqs):
+            # Get the probabilities corresponding to the path `seq`:
+            s = np.array([t[self.word2index.get(w, self.unk_index)]
+                         for t, w in zip(pred, seq)])
+            scores.append(s)
+        perp = [np.prod(s)**(-1/len(s)) for s in scores]
+        return perp
+
+    def listener_predict_one(self, context, seq):
+        context = np.array(context)
+        n_colors = len(context)
+
+        # Get all possible context orders:
+        indices = list(range(n_colors))
+        orders = [list(x) for x in itertools.product(indices, repeat=n_colors)]
+
+        # All contexts as color sequences:
+        contexts = [context[x] for x in orders]
+
+        # Repeat the single utterance the needed number of times:
+        seqs = [seq] * len(contexts)
+
+        # All perplexities:
+        perps = self.perplexities(contexts, seqs)
+
+        # Ranking, using `order_indices` rather than colors and
+        # index sequences to avoid sorting errors from some versions
+        # of Python:
+        order_indices = range(len(orders))
+        ranking = sorted(zip(perps, order_indices))
+
+        # Return the minimum perplexity, the chosen color, and the
+        # index of the chosen color in the original context:
+        min_perp, order_index = ranking[0]
+        pred_color = contexts[order_index][-1]
+        pred_index = orders[order_index][-1]
+        return min_perp, pred_color, pred_index
+
+    def listener_accuracy(self, color_seqs, word_seqs):
+        """Compute the "listener accuracy" of the model for each example.
+        For the ith example, this is defined as
+
+        prediction = max_{c in C_i} P(word_seq[i] | c)
+
+        where C_i is every possible permutation of the three colors in
+        color_seqs[i]. We take the model's prediction to be correct
+        if it chooses a c in which the target is in the privileged final
+        position in the color sequence. (There are two such c's, since
+        the distractors can be in two orders; we give full credit if one
+        of these two c's is chosen.)
+
+        Parameters
+        ----------
+        color_seqs : list of lists of list of floats, or np.array
+            Dimension (m, n, p) where m is the number of examples, n is
+            the number of colors in each context, and p is the length
+            of the color representations.
+        word_seqs : list of list of int
+            Dimension m, the number of examples, and the length of
+            each sequence can vary.
+
+        Returns
+        -------
+        list of float
+
+        """
+        correct = 0
+        for color_seq, word_seq in zip(color_seqs, word_seqs):
+            target_index = len(color_seq) - 1
+            min_perp, pred, pred_index = self.listener_predict_one(
+                color_seq, word_seq)
+            correct += int(target_index == pred_index)
+        return correct / len(color_seqs)
+
+    def score(self, color_seqs, word_seqs):
+        """Alias for `listener_accuracy`. This method is included to
+        make it easier to use sklearn cross-validators, which expect
+        a method called `score`.
+
+        """
+        return self.listener_accuracy(color_seqs, word_seqs)
+
+
+def create_example_dataset(group_size=100, vec_dim=2):
+    """Creates simple datasets in which the inputs are three-vector
+    sequences and the outputs are simple character sequences, with
+    the range of values in the final vector in the input determining
+    the output sequence. For example, a single input/output pair
+    will look like this:
+
+    [[0.44, 0.51], [0.87, 0.89], [0.1, 0.2]],  ['<s>', 'A', '</s>']
+
+    The sequences are meaningless, as are their lengths (which were
+    chosen only to be different from each other).
+
+    """
+    import random
+
+    groups = ((0.0, 0.2), (0.4, 0.6), (0.8, 1.0))
+    vocab = ['<s>', '</s>', 'A', 'B', '$UNK']
+    seqs = [
+        ['<s>', 'A', '</s>'],
+        ['<s>', 'A', 'B', '</s>'],
+        ['<s>', 'B', 'A', 'B', 'A', '</s>']]
+
+    color_seqs = []
+    word_seqs = []
+    for i, ((l, u), seq) in enumerate(zip(groups, seqs)):
+
+        dis_indices = list(range(len(groups)))
+        dis_indices.remove(i)
+        random.shuffle(dis_indices)
+        disl1, disu1 = groups[dis_indices[0]]
+        dis2 = disl2, disu2 = groups[dis_indices[1]]
+
+        for _ in  range(group_size):
+            target = utils.randvec(vec_dim, l, u)
+            dis1 = utils.randvec(vec_dim, disl1, disu1)
+            dis2 = utils.randvec(vec_dim, disl2, disu2)
+            context = [dis1, dis2, target]
+            color_seqs.append(context)
+
+        word_seqs += [seq for _ in range(group_size)]
+
+    return color_seqs, word_seqs, vocab
+
+
+def simple_example(group_size=100, vec_dim=2, initial_embedding=False):
+    from sklearn.model_selection import train_test_split
+
+    color_seqs, word_seqs, vocab = create_example_dataset(
+        group_size=group_size, vec_dim=vec_dim)
+
+    if initial_embedding:
+        import numpy as np
+        embedding = np.random.uniform(
+            low=-0.5, high=0.5, size=(len(vocab), 11))
+    else:
+        embedding = None
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        color_seqs, word_seqs)
+
+    mod = ContextualColorDescriber(
+        vocab,
+        embed_dim=10,
+        hidden_dim=10,
+        max_iter=100,
+        embedding=embedding)
+
+    mod.fit(X_train, y_train)
+
+    preds = mod.predict(X_test)
+
+    correct = 0
+    for y, p in zip(y_test, preds):
+        if y == p:
+            correct += 1
+
+    print("\nExact sequence: {} of {} correct".format(correct, len(y_test)))
+
+    lis_acc = mod.listener_accuracy(X_test, y_test)
+
+    print("\nListener accuracy {}".format(lis_acc))
+
+    return lis_acc
+
+
+if __name__ == '__main__':
+    simple_example()