Update files

Gunnika · Mar 16, 2020 · e5b6b1f · e5b6b1f
1 parent cf55fde
commit e5b6b1f
Show file tree

Hide file tree

Showing 4 changed files with 924 additions and 38 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,7 @@ data/
 preprocessed/
 asl-alphabet/
 saved_model.pt
+__pycache__/
 
 .DS_Store
 app/.DS_Store

diff --git a/Gesture_Recognition.ipynb b/Gesture_Recognition.ipynb
@@ -0,0 +1,247 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "There are 87000 total train images.\n",
+      "There are 28 total test images.\n",
+      "There are 69600 total train images.\n",
+      "There are 17400 total dog validation images.\n",
+      "There are 28 total test images.\n",
+      "----------------------------------------------------------------\n",
+      "        Layer (type)               Output Shape         Param #\n",
+      "================================================================\n",
+      "            Conv2d-1           [-1, 10, 48, 48]             100\n",
+      "         MaxPool2d-2           [-1, 10, 24, 24]               0\n",
+      "            Conv2d-3           [-1, 20, 22, 22]           1,820\n",
+      "         MaxPool2d-4           [-1, 20, 11, 11]               0\n",
+      "            Conv2d-5             [-1, 30, 9, 9]           5,430\n",
+      "         Dropout2d-6             [-1, 30, 9, 9]               0\n",
+      "            Linear-7                  [-1, 270]         656,370\n",
+      "            Linear-8                   [-1, 29]           7,859\n",
+      "        LogSoftmax-9                   [-1, 29]               0\n",
+      "================================================================\n",
+      "Total params: 671,579\n",
+      "Trainable params: 671,579\n",
+      "Non-trainable params: 0\n",
+      "----------------------------------------------------------------\n",
+      "Input size (MB): 0.01\n",
+      "Forward/backward pass size (MB): 0.35\n",
+      "Params size (MB): 2.56\n",
+      "Estimated Total Size (MB): 2.92\n",
+      "----------------------------------------------------------------\n",
+      "Epoch: 1 \n",
+      "   20/87000: [>...............................] - ETA 0.0s"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "element 0 of tensors does not require grad and does not have a grad_fn",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-18-f37309bbd9b9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorchvision\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransforms\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtransforms\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mgesture_detector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgesture_detector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNetwork\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/Sign Language Detector PyTorch/gesture_detector.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m    265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    266\u001b[0m \u001b[0;31m# train the model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mmodel_scratch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muse_cuda\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'saved_model.pt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/Sign Language Detector PyTorch/gesture_detector.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path)\u001b[0m\n\u001b[1;32m    214\u001b[0m             \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    215\u001b[0m             \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 216\u001b[0;31m             \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    217\u001b[0m             \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    218\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m    193\u001b[0m                 \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    194\u001b[0m         \"\"\"\n\u001b[0;32m--> 195\u001b[0;31m         \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    196\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    197\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m     97\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[1;32m     98\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m         allow_unreachable=True)  # allow_unreachable flag\n\u001b[0m\u001b[1;32m    100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn"
+     ]
+    }
+   ],
+   "source": [
+    "import cv2\n",
+    "import numpy as np\n",
+    "\n",
+    "import torch\n",
+    "import torchvision.transforms as transforms\n",
+    "import gesture_detector.py \n",
+    "a = gesture_detector.Network()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'Network' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-17-57fae5e3d28e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNetwork\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'saved_model.pt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'Network' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "model = Network()\n",
+    "model.load_state_dict(torch.load('saved_model.pt'))\n",
+    "model.eval()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gestures = {\n",
+    "    0:'A',\n",
+    "    1:'B',\n",
+    "    2:'C',\n",
+    "    3:'D',\n",
+    "    4:'E',\n",
+    "    5:'F',\n",
+    "    6:'G',\n",
+    "    7:'H',\n",
+    "    8:'I',\n",
+    "    9:'J',\n",
+    "    10:'K',\n",
+    "    11:'L',\n",
+    "    12:'M',\n",
+    "    13:'N',\n",
+    "    14:'O',\n",
+    "    15:'P',\n",
+    "    16:'Q',\n",
+    "    17:'R',\n",
+    "    18:'S',\n",
+    "    19:'T',\n",
+    "    20:'U',\n",
+    "    21:'V',\n",
+    "    22:'W',\n",
+    "    23:'X',\n",
+    "    24:'Y',\n",
+    "    25:'Z',\n",
+    "    26:'del',\n",
+    "    27:'nothing',\n",
+    "    28:'space'\n",
+    "    \n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict(img_path):\n",
+    "    # load the image and return the predicted breed\n",
+    "    img = Image.open(img_path)\n",
+    "#     img = Image.open(img_path).convert('L')\n",
+    "    transformations = transforms.Compose([transforms.Grayscale(num_output_channels=1),\n",
+    "                                            transforms.Resize(size=50),\n",
+    "                                         transforms.ToTensor(),\n",
+    "                                         transforms.Normalize([0.5],[0.5])])\n",
+    "    image_tensor = transformations(img)[:3,:,:].unsqueeze(0)\n",
+    "\n",
+    "    # move model inputs to cuda, if GPU available\n",
+    "    if use_cuda:\n",
+    "        image_tensor = image_tensor.cuda()\n",
+    "\n",
+    "    # get sample outputs\n",
+    "    output = model(image_tensor)\n",
+    "    # convert output probabilities to predicted class\n",
+    "    _, preds_tensor = torch.max(output, 1)\n",
+    "\n",
+    "    pred = np.squeeze(preds_tensor.numpy()[0]) if not use_cuda else np.squeeze(preds_tensor.cpu().numpy()[0])\n",
+    "\n",
+    "    return gestures[pred]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vc = cv2.VideoCapture(0)\n",
+    "rval, frame = vc.read()\n",
+    "old_text = ''\n",
+    "pred_text = ''\n",
+    "count_frames = 0\n",
+    "total_str = ''\n",
+    "flag = False\n",
+    "\n",
+    "while True:\n",
+    "    \n",
+    "    if frame is not None: \n",
+    "        \n",
+    "        frame = cv2.flip(frame, 1)\n",
+    "        frame = cv2.resize( frame, (400,400) )\n",
+    "        \n",
+    "        cv2.rectangle(frame, (300,300), (100,100), (0,255,0), 2)\n",
+    "        \n",
+    "        crop_img = frame[100:300, 100:300]\n",
+    "        grey = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)\n",
+    "        \n",
+    "        thresh = cv2.threshold(grey,210,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]\n",
+    "      \n",
+    "        \n",
+    "        blackboard = np.zeros(frame.shape, dtype=np.uint8)\n",
+    "        cv2.putText(blackboard, \"Predicted text - \", (30, 40), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0))\n",
+    "        if count_frames > 20 and pred_text != \"\":\n",
+    "            total_str += pred_text\n",
+    "            count_frames = 0\n",
+    "            \n",
+    "        if flag == True:\n",
+    "            old_text = pred_text\n",
+    "            pred_text = predict(thresh)\n",
+    "        \n",
+    "            if old_text == pred_text:\n",
+    "                count_frames += 1\n",
+    "            else:\n",
+    "                count_frames = 0\n",
+    "            cv2.putText(blackboard, total_str, (30, 80), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 127))\n",
+    "        res = np.hstack((frame, blackboard))\n",
+    "        \n",
+    "        cv2.imshow(\"image\", res)\n",
+    "        cv2.imshow(\"hand\", thresh)\n",
+    "        \n",
+    "    rval, frame = vc.read()\n",
+    "    keypress = cv2.waitKey(1)\n",
+    "    if keypress == ord('c'):\n",
+    "        flag = True\n",
+    "    if keypress == ord('q'):\n",
+    "        break\n",
+    "\n",
+    "vc.release()\n",
+    "cv2.destroyAllWindows()\n",
+    "cv2.waitKey(1)\n",
+    "\n",
+    "vc.release()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}