test cases for loading

ahirner · ahirner · commit 55d8823bf006 · 2017-06-01T17:01:15.000+02:00
diff --git a/load_pretrained.ipynb b/load_pretrained.ipynb
@@ -0,0 +1,259 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torchvision\n",
+    "import torchvision.models as models\n",
+    "import torch.utils.model_zoo as model_zoo"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_urls = {\n",
+    "    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',\n",
+    "    'densenet121': 'https://download.pytorch.org/models/densenet121-241335ed.pth',\n",
+    "    'densenet169': 'https://download.pytorch.org/models/densenet169-6f0f7f60.pth',\n",
+    "    'densenet201': 'https://download.pytorch.org/models/densenet201-4c113574.pth',\n",
+    "    'densenet161': 'https://download.pytorch.org/models/densenet161-17b70270.pth',\n",
+    "    #truncated _google to match module name\n",
+    "    'inception_v3': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',\n",
+    "    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',\n",
+    "    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',\n",
+    "    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n",
+    "    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n",
+    "    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',    \n",
+    "    'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',\n",
+    "    'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',\n",
+    "    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',\n",
+    "    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',\n",
+    "    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',\n",
+    "    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',  \n",
+    "}\n",
+    "\n",
+    "model_names = model_urls.keys()\n",
+    "\n",
+    "input_sizes = {\n",
+    "    'alexnet' : (224,224),\n",
+    "    'densenet': (224,224),\n",
+    "    'resnet' : (224,224),\n",
+    "    'inception' : (299,299),\n",
+    "    'squeezenet' : (224,224),#not 255,255 acc. to https://github.com/pytorch/pytorch/issues/1120\n",
+    "    'vgg' : (224,224)\n",
+    "}\n",
+    "\n",
+    "#models_to_test = ['alexnet', 'densenet169', 'inception_v3', \\\n",
+    "#                  'resnet34', 'squeezenet1_1', 'vgg13']\n",
+    "\n",
+    "models_to_test = model_names"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Generic pretrained model loading"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#We solve the dimensionality mismatch between\n",
+    "#final layers in the constructed vs pretrained\n",
+    "#modules at the data level.\n",
+    "def diff_states(dict_canonical, dict_subset):\n",
+    "    names1, names2 = (list(dict_canonical.keys()), list(dict_subset.keys()))\n",
+    "    \n",
+    "    #Sanity check that param names overlap\n",
+    "    #Note that params are not necessarily in the same order\n",
+    "    #for every pretrained model\n",
+    "    not_in_1 = [n for n in names1 if n not in names2]\n",
+    "    not_in_2 = [n for n in names2 if n not in names1]\n",
+    "    assert len(not_in_1) == 0\n",
+    "    assert len(not_in_2) == 0\n",
+    "\n",
+    "    for name, v1 in dict_canonical.items():\n",
+    "        v2 = dict_subset[name]\n",
+    "        assert hasattr(v2, 'size')\n",
+    "        if v1.size() != v2.size():\n",
+    "            yield (name, v1)                \n",
+    "\n",
+    "def load_model_named(name):   \n",
+    "    #Densenets don't (yet) pass on num_classes, hack it in\n",
+    "    if \"densenet\" in name:\n",
+    "        if name == 'densenet169':\n",
+    "            return models.DenseNet(num_init_features=64, growth_rate=32, \\\n",
+    "                                   block_config=(6, 12, 32, 32), num_classes=num_classes)\n",
+    "        \n",
+    "        elif name == 'densenet121':\n",
+    "            return models.DenseNet(num_init_features=64, growth_rate=32, \\\n",
+    "                                   block_config=(6, 12, 24, 16), num_classes=num_classes)\n",
+    "        \n",
+    "        elif name == 'densenet201':\n",
+    "            return models.DenseNet(num_init_features=64, growth_rate=32, \\\n",
+    "                                   block_config=(6, 12, 48, 32), num_classes=num_classes)\n",
+    "\n",
+    "        elif name == 'densenet161':\n",
+    "             return models.DenseNet(num_init_features=96, growth_rate=48, \\\n",
+    "                                    block_config=(6, 12, 36, 24), num_classes=num_classes)\n",
+    "        else:\n",
+    "            raise ValueError(\"Cirumventing missing num_classes kwargs not implemented for %s\" % name)\n",
+    "    \n",
+    "    return models.__dict__[name](num_classes=num_classes)\n",
+    "    \n",
+    "            \n",
+    "def load_model(name, num_classes):\n",
+    "    \n",
+    "    model = load_model_named(name)\n",
+    "    pretrained_state = model_zoo.load_url(model_urls[name])\n",
+    "\n",
+    "    #Diff\n",
+    "    diff = list(diff_states(model.state_dict(), pretrained_state))\n",
+    "    \n",
+    "    for name, value in diff:\n",
+    "        pretrained_state[name] = value\n",
+    "    \n",
+    "    assert len(list(diff_states(model.state_dict(), pretrained_state))) == 0\n",
+    "    \n",
+    "    #Merge\n",
+    "    model.load_state_dict(pretrained_state)\n",
+    "    return model, diff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Method to mutate module programmatically (PR #175)\n",
+    "# https://github.com/pytorch/vision/pull/175\n",
+    "\n",
+    "def resize_network_output(net, output_size):\n",
+    "    if isinstance(net, torch.nn.DataParallel):\n",
+    "        return resize_network_output(net.module, output_size)\n",
+    "\n",
+    "    # Edit: Can't index iterable in python3\n",
+    "    #output_layer = net._modules.keys()[-1]\n",
+    "    for output_layer in net._modules.keys():\n",
+    "        pass\n",
+    "    old_output_layer = net._modules[output_layer]\n",
+    "\n",
+    "    if isinstance(old_output_layer, nn.Sequential):\n",
+    "        return resize_network_output(old_output_layer, output_size)\n",
+    "    elif isinstance(old_output_layer, nn.modules.pooling.AvgPool2d):\n",
+    "        # Go back in the layer sequence and find the last conv layer and resize that\n",
+    "        # Only happens for squeezenet1_0\n",
+    "        # Edit: iteritems deprecated in python3\n",
+    "        for name, layer in list(net._modules.items())[::-1][1:]:\n",
+    "            if isinstance(layer, nn.modules.conv.Conv2d):\n",
+    "                net._modules[name] = nn.modules.conv.Conv2d(layer.in_channels, output_size, layer.kernel_size,\n",
+    "                                                            layer.stride, layer.padding, layer.dilation, layer.groups)\n",
+    "                return\n",
+    "        assert False\n",
+    "\n",
+    "    assert isinstance(old_output_layer, nn.Linear), 'Class of old_output_layer {}'.format(old_output_layer.__class__.__name__)\n",
+    "    input_size = old_output_layer.weight.size()[1]\n",
+    "\n",
+    "    net._modules[output_layer] = nn.Linear(input_size, output_size)\n",
+    "\n",
+    "\n",
+    "def load_model_resize_post(name, num_classes):\n",
+    "    model = load_model_named(name)\n",
+    "    resize_network_output(model, num_classes)\n",
+    "    return model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compare generic loading methods"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# If no cuda is present, unpickle fails with this net...\n",
+    "# Need to update pretrained model with cpu to resolve?\n",
+    "# models_to_test.remove('densenet169')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_classes = 10\n",
+    "\n",
+    "for name in models_to_test:\n",
+    "    print(\"\")\n",
+    "    print(name, \"with %d classes\" % num_classes)\n",
+    "    try:\n",
+    "        model_merged, diff = load_model(name, num_classes)\n",
+    "        diff_vanilla = [d[0] for d in diff]\n",
+    "        result = (\"... merge loading: \" + str(diff_vanilla)).ljust(99) \\\n",
+    "        + \"OK\" if len(diff_vanilla) > 0 else \"X\"\n",
+    "    except Exception as e:\n",
+    "        result = (\"... merge loading: \" + str(e)).ljust(99) + \"X\"\n",
+    "    finally:\n",
+    "        print(result)\n",
+    "    \n",
+    "    try:\n",
+    "        model_resized = load_model_resize_post(name, num_classes)\n",
+    "        diff_merged_resized = [p[0] for p in \\\n",
+    "                               diff_states(model_merged.state_dict(), model_resized.state_dict())]\n",
+    "        result = (\"... resizing after load: \" + str(diff_merged_resized)).ljust(99) \\\n",
+    "        + \"OK\" if len(diff_merged_resized) == 0 else \"X\"\n",
+    "    except Exception as e:\n",
+    "        result = (\"... resizing after load: \" + str(e)).ljust(99) + \"X\"\n",
+    "    finally:\n",
+    "        print(result)        "
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}