Skip to content

Commit

Permalink
Update files
Browse files Browse the repository at this point in the history
  • Loading branch information
Gunnika committed Mar 16, 2020
1 parent cf55fde commit e5b6b1f
Show file tree
Hide file tree
Showing 4 changed files with 924 additions and 38 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ data/
preprocessed/
asl-alphabet/
saved_model.pt
__pycache__/

.DS_Store
app/.DS_Store
Expand Down
247 changes: 247 additions & 0 deletions Gesture_Recognition.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are 87000 total train images.\n",
"There are 28 total test images.\n",
"There are 69600 total train images.\n",
"There are 17400 total dog validation images.\n",
"There are 28 total test images.\n",
"----------------------------------------------------------------\n",
" Layer (type) Output Shape Param #\n",
"================================================================\n",
" Conv2d-1 [-1, 10, 48, 48] 100\n",
" MaxPool2d-2 [-1, 10, 24, 24] 0\n",
" Conv2d-3 [-1, 20, 22, 22] 1,820\n",
" MaxPool2d-4 [-1, 20, 11, 11] 0\n",
" Conv2d-5 [-1, 30, 9, 9] 5,430\n",
" Dropout2d-6 [-1, 30, 9, 9] 0\n",
" Linear-7 [-1, 270] 656,370\n",
" Linear-8 [-1, 29] 7,859\n",
" LogSoftmax-9 [-1, 29] 0\n",
"================================================================\n",
"Total params: 671,579\n",
"Trainable params: 671,579\n",
"Non-trainable params: 0\n",
"----------------------------------------------------------------\n",
"Input size (MB): 0.01\n",
"Forward/backward pass size (MB): 0.35\n",
"Params size (MB): 2.56\n",
"Estimated Total Size (MB): 2.92\n",
"----------------------------------------------------------------\n",
"Epoch: 1 \n",
" 20/87000: [>...............................] - ETA 0.0s"
]
},
{
"ename": "RuntimeError",
"evalue": "element 0 of tensors does not require grad and does not have a grad_fn",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-18-f37309bbd9b9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorchvision\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransforms\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtransforms\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mgesture_detector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgesture_detector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNetwork\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Sign Language Detector PyTorch/gesture_detector.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;31m# train the model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mmodel_scratch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muse_cuda\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'saved_model.pt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/Sign Language Detector PyTorch/gesture_detector.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 216\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 217\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 194\u001b[0m \"\"\"\n\u001b[0;32m--> 195\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 196\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m 97\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 98\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m allow_unreachable=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn"
]
}
],
"source": [
"import cv2\n",
"import numpy as np\n",
"\n",
"import torch\n",
"import torchvision.transforms as transforms\n",
"import gesture_detector.py \n",
"a = gesture_detector.Network()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'Network' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-17-57fae5e3d28e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNetwork\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'saved_model.pt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mNameError\u001b[0m: name 'Network' is not defined"
]
}
],
"source": [
"model = Network()\n",
"model.load_state_dict(torch.load('saved_model.pt'))\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gestures = {\n",
" 0:'A',\n",
" 1:'B',\n",
" 2:'C',\n",
" 3:'D',\n",
" 4:'E',\n",
" 5:'F',\n",
" 6:'G',\n",
" 7:'H',\n",
" 8:'I',\n",
" 9:'J',\n",
" 10:'K',\n",
" 11:'L',\n",
" 12:'M',\n",
" 13:'N',\n",
" 14:'O',\n",
" 15:'P',\n",
" 16:'Q',\n",
" 17:'R',\n",
" 18:'S',\n",
" 19:'T',\n",
" 20:'U',\n",
" 21:'V',\n",
" 22:'W',\n",
" 23:'X',\n",
" 24:'Y',\n",
" 25:'Z',\n",
" 26:'del',\n",
" 27:'nothing',\n",
" 28:'space'\n",
" \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def predict(img_path):\n",
" # load the image and return the predicted breed\n",
" img = Image.open(img_path)\n",
"# img = Image.open(img_path).convert('L')\n",
" transformations = transforms.Compose([transforms.Grayscale(num_output_channels=1),\n",
" transforms.Resize(size=50),\n",
" transforms.ToTensor(),\n",
" transforms.Normalize([0.5],[0.5])])\n",
" image_tensor = transformations(img)[:3,:,:].unsqueeze(0)\n",
"\n",
" # move model inputs to cuda, if GPU available\n",
" if use_cuda:\n",
" image_tensor = image_tensor.cuda()\n",
"\n",
" # get sample outputs\n",
" output = model(image_tensor)\n",
" # convert output probabilities to predicted class\n",
" _, preds_tensor = torch.max(output, 1)\n",
"\n",
" pred = np.squeeze(preds_tensor.numpy()[0]) if not use_cuda else np.squeeze(preds_tensor.cpu().numpy()[0])\n",
"\n",
" return gestures[pred]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"vc = cv2.VideoCapture(0)\n",
"rval, frame = vc.read()\n",
"old_text = ''\n",
"pred_text = ''\n",
"count_frames = 0\n",
"total_str = ''\n",
"flag = False\n",
"\n",
"while True:\n",
" \n",
" if frame is not None: \n",
" \n",
" frame = cv2.flip(frame, 1)\n",
" frame = cv2.resize( frame, (400,400) )\n",
" \n",
" cv2.rectangle(frame, (300,300), (100,100), (0,255,0), 2)\n",
" \n",
" crop_img = frame[100:300, 100:300]\n",
" grey = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)\n",
" \n",
" thresh = cv2.threshold(grey,210,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]\n",
" \n",
" \n",
" blackboard = np.zeros(frame.shape, dtype=np.uint8)\n",
" cv2.putText(blackboard, \"Predicted text - \", (30, 40), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0))\n",
" if count_frames > 20 and pred_text != \"\":\n",
" total_str += pred_text\n",
" count_frames = 0\n",
" \n",
" if flag == True:\n",
" old_text = pred_text\n",
" pred_text = predict(thresh)\n",
" \n",
" if old_text == pred_text:\n",
" count_frames += 1\n",
" else:\n",
" count_frames = 0\n",
" cv2.putText(blackboard, total_str, (30, 80), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 127))\n",
" res = np.hstack((frame, blackboard))\n",
" \n",
" cv2.imshow(\"image\", res)\n",
" cv2.imshow(\"hand\", thresh)\n",
" \n",
" rval, frame = vc.read()\n",
" keypress = cv2.waitKey(1)\n",
" if keypress == ord('c'):\n",
" flag = True\n",
" if keypress == ord('q'):\n",
" break\n",
"\n",
"vc.release()\n",
"cv2.destroyAllWindows()\n",
"cv2.waitKey(1)\n",
"\n",
"vc.release()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit e5b6b1f

Please sign in to comment.