fix bug.

Tiiiger · Tiiiger · commit c870b94e890d · 2019-07-25T23:18:45.000-04:00
diff --git a/examples/tutorial/CIFAR10_Low_Precision_Training_Example.ipynb b/examples/tutorial/CIFAR10_Low_Precision_Training_Example.ipynb
@@ -22,11 +22,12 @@
     "import torch.nn.functional as F\n",
     "import torchvision\n",
     "import torchvision.transforms as transforms\n",
-    "from qtorch.quant import Quantizer\n",
+    "from qtorch.quant import Quantizer, quantizer\n",
     "from qtorch.optim import OptimLP\n",
     "from torch.optim import SGD\n",
     "from qtorch import FloatingPoint\n",
-    "from tqdm import tqdm"
+    "from tqdm import tqdm\n",
+    "import math"
    ]
   },
   {
@@ -103,14 +104,14 @@
     "bit_16 = FloatingPoint(exp=6, man=9)\n",
     "\n",
     "# define quantization functions\n",
-    "weight_quant = Quantizer(forward_number=bit_8, backward_number=None,\n",
-    "                        forward_rounding=\"nearest\", backward_rounding=\"nearest\")\n",
-    "grad_quant = Quantizer(forward_number=bit_8, backward_number=None,\n",
-    "                        forward_rounding=\"nearest\", backward_rounding=\"stochastic\")\n",
-    "momentum_quant = Quantizer(forward_number=bit_16, backward_number=None,\n",
-    "                        forward_rounding=\"nearest\", backward_rounding=\"stochastic\")\n",
-    "acc_quant = Quantizer(forward_number=bit_16, backward_number=None,\n",
-    "                        forward_rounding=\"nearest\", backward_rounding=\"nearest\")\n",
+    "weight_quant = quantizer(forward_number=bit_8,\n",
+    "                        forward_rounding=\"nearest\")\n",
+    "grad_quant = quantizer(forward_number=bit_8,\n",
+    "                        forward_rounding=\"nearest\")\n",
+    "momentum_quant = quantizer(forward_number=bit_16,\n",
+    "                        forward_rounding=\"stochastic\")\n",
+    "acc_quant = quantizer(forward_number=bit_16,\n",
+    "                        forward_rounding=\"stochastic\")\n",
     "\n",
     "# define a lambda function so that the Quantizer module can be duplicated easily\n",
     "act_error_quant = lambda : Quantizer(forward_number=bit_8, backward_number=bit_8,\n",
@@ -121,7 +122,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Next, we define a low-precision VGG network. In the definition, we recursively insert quantization module after every convolution layer. Note that the quantization of weight, gradient, momentum, and gradient accumulator are not handled here."
+    "Next, we define a low-precision ResNet. In the definition, we recursively insert quantization module after every convolution layer. Note that the quantization of weight, gradient, momentum, and gradient accumulator are not handled here."
    ]
   },
   {
@@ -130,58 +131,127 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# let's define the model we are using\n",
-    "def make_layers(cfg, quant):\n",
-    "    layers = list()\n",
-    "    in_channels = 3\n",
-    "    n = 1\n",
-    "    for v in cfg:\n",
-    "        if v == 'M':\n",
-    "            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]\n",
-    "        else:\n",
-    "            use_quant = v[-1] != 'N'\n",
-    "            filters = int(v) if use_quant else int(v[:-1])\n",
-    "            conv2d = nn.Conv2d(in_channels, filters, kernel_size=3, padding=1)\n",
-    "            layers += [conv2d, nn.ReLU(inplace=True)]\n",
-    "            if use_quant: layers += [quant()] # inserting quantization modules\n",
-    "            n += 1\n",
-    "            in_channels = filters\n",
-    "    return nn.Sequential(*layers)\n",
+    "def conv3x3(in_planes, out_planes, stride=1):\n",
+    "    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,\n",
+    "                     padding=1, bias=False)\n",
     "\n",
-    "class VGGLP(nn.Module):\n",
-    "    def __init__(self, config, quant=None, num_classes=10):\n",
+    "class BasicBlock(nn.Module):\n",
+    "    expansion = 1\n",
     "\n",
-    "        super(VGGLP, self).__init__()\n",
-    "        self.features = make_layers(config, quant)\n",
-    "        self.classifier = nn.Sequential(\n",
-    "            nn.Dropout(),\n",
-    "            nn.Linear(512, 512),\n",
-    "            nn.ReLU(True),\n",
-    "            quant(),\n",
-    "            nn.Dropout(),\n",
-    "            nn.Linear(512, 512),\n",
-    "            nn.ReLU(True),\n",
-    "            quant(),\n",
-    "            nn.Linear(512, num_classes),\n",
-    "        )\n",
+    "    def __init__(self, inplanes, planes, quant, stride=1, downsample=None):\n",
+    "        super(BasicBlock, self).__init__()\n",
+    "        self.bn1 = nn.BatchNorm2d(inplanes)\n",
+    "        self.relu = nn.ReLU(inplace=True)\n",
+    "        self.conv1 = conv3x3(inplanes, planes, stride)\n",
+    "        self.bn2 = nn.BatchNorm2d(planes)\n",
+    "        self.conv2 = conv3x3(planes, planes)\n",
+    "        self.downsample = downsample\n",
+    "        self.stride = stride\n",
+    "        self.quant = quant()\n",
     "\n",
     "    def forward(self, x):\n",
-    "        x = self.features(x)\n",
-    "        x = x.view(x.size(0), -1)\n",
-    "        x = self.classifier(x)\n",
-    "        return x\n",
+    "        residual = x\n",
+    "\n",
+    "        out = self.bn1(x)\n",
+    "        out = self.relu(out)\n",
+    "        out = self.quant(out)\n",
+    "        out = self.conv1(out)\n",
+    "        out = self.quant(out)\n",
+    "\n",
+    "        out = self.bn2(out)\n",
+    "        out = self.relu(out)\n",
+    "        out = self.quant(out)\n",
+    "        out = self.conv2(out)\n",
+    "        out = self.quant(out)\n",
+    "\n",
+    "        if self.downsample is not None:\n",
+    "            residual = self.downsample(x)\n",
+    "\n",
+    "        out += residual\n",
+    "\n",
+    "        return out\n",
     "    \n",
-    "config = ['64', '64', 'M', '128', '128', 'M', \n",
-    "          '256', '256', '256', 'M', '512', '512', '512', 'M', '512', '512', '512', 'M'] # VGG16\n",
+    "class PreResNet(nn.Module):\n",
+    "\n",
+    "    def __init__(self,quant, num_classes=10, depth=20):\n",
+    "\n",
+    "        super(PreResNet, self).__init__()\n",
+    "        assert (depth - 2) % 6 == 0, 'depth should be 6n+2'\n",
+    "        n = (depth - 2) // 6\n",
+    "\n",
+    "        block = BasicBlock\n",
+    "\n",
+    "        self.inplanes = 16\n",
+    "        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1,\n",
+    "                               bias=False)\n",
+    "        self.layer1 = self._make_layer(block, 16, n, quant)\n",
+    "        self.layer2 = self._make_layer(block, 32, n, quant, stride=2)\n",
+    "        self.layer3 = self._make_layer(block, 64, n, quant, stride=2)\n",
+    "        self.bn = nn.BatchNorm2d(64 * block.expansion)\n",
+    "        self.relu = nn.ReLU(inplace=True)\n",
+    "        self.avgpool = nn.AvgPool2d(8)\n",
+    "        self.fc = nn.Linear(64 * block.expansion, num_classes)\n",
+    "        self.quant = quant()\n",
+    "        IBM_half = FloatingPoint(exp=6, man=9)\n",
+    "        self.quant_half = Quantizer(IBM_half, IBM_half, \"nearest\", \"nearest\")\n",
+    "        for m in self.modules():\n",
+    "            if isinstance(m, nn.Conv2d):\n",
+    "                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n",
+    "                m.weight.data.normal_(0, math.sqrt(2. / n))\n",
+    "            elif isinstance(m, nn.BatchNorm2d):\n",
+    "                m.weight.data.fill_(1)\n",
+    "                m.bias.data.zero_()\n",
+    "\n",
+    "    def _make_layer(self, block, planes, blocks, quant, stride=1):\n",
+    "        downsample = None\n",
+    "        if stride != 1 or self.inplanes != planes * block.expansion:\n",
+    "            downsample = nn.Sequential(\n",
+    "                nn.Conv2d(self.inplanes, planes * block.expansion,\n",
+    "                          kernel_size=1, stride=stride, bias=False),\n",
+    "            )\n",
+    "\n",
+    "        layers = list()\n",
+    "        layers.append(block(self.inplanes, planes, quant , stride, downsample))\n",
+    "        self.inplanes = planes * block.expansion\n",
+    "        for i in range(1, blocks):\n",
+    "            layers.append(block(self.inplanes, planes, quant))\n",
     "\n",
-    "model = VGGLP(config, act_error_quant)"
+    "        return nn.Sequential(*layers)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.quant_half(x)\n",
+    "        x = self.conv1(x)\n",
+    "        x = self.quant(x)\n",
+    "\n",
+    "        x = self.layer1(x)  # 32x32\n",
+    "        x = self.layer2(x)  # 16x16\n",
+    "        x = self.layer3(x)  # 8x8\n",
+    "        x = self.bn(x)\n",
+    "        x = self.relu(x)\n",
+    "        x = self.quant(x)\n",
+    "\n",
+    "        x = self.avgpool(x)\n",
+    "        x = x.view(x.size(0), -1)\n",
+    "        x = self.fc(x)\n",
+    "        x = self.quant_half(x)\n",
+    "\n",
+    "        return x"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "model = PreResNet(act_error_quant)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "device = 'cuda' # change device to 'cpu' if you want to run this example on cpu\n",
     "model = model.to(device=device)"
@@ -196,16 +266,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
-    "optimizer = SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=1e-4)\n",
+    "optimizer = SGD(model.parameters(), lr=0.05, momentum=0.9, weight_decay=5e-4)\n",
     "optimizer = OptimLP(optimizer,\n",
     "                    weight_quant=weight_quant,\n",
     "                    grad_quant=grad_quant,\n",
     "                    momentum_quant=momentum_quant,\n",
-    "                    acc_quant=acc_quant\n",
+    "                    acc_quant=acc_quant,\n",
+    "                    grad_scaling=1/1000\n",
     ")"
    ]
   },
@@ -218,7 +289,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -243,6 +314,7 @@
     "            ttl += input.size()[0]\n",
     "\n",
     "            if phase==\"train\":\n",
+    "                loss = loss * 1000\n",
     "                optimizer.zero_grad()\n",
     "                loss.backward()\n",
     "                optimizer.step()\n",
@@ -263,15 +335,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 391/391 [00:34<00:00, 11.34it/s]\n",
-      "100%|██████████| 79/79 [00:01<00:00, 70.06it/s]\n"
+      "100%|██████████| 391/391 [00:14<00:00, 26.41it/s]\n",
+      "100%|██████████| 79/79 [00:01<00:00, 78.18it/s]\n"
      ]
     }
    ],
@@ -282,6 +354,46 @@
     "    test_res = run_epoch(loaders['test'], model, F.cross_entropy,\n",
     "                                optimizer=optimizer, phase=\"eval\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'loss': 1.6471979439544677, 'accuracy': 37.566}"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_res"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'loss': 1.5749474658966065, 'accuracy': 43.63}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_res"
+   ]
   }
  ],
  "metadata": {
@@ -300,7 +412,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.7"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,