-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
924 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ data/ | |
preprocessed/ | ||
asl-alphabet/ | ||
saved_model.pt | ||
__pycache__/ | ||
|
||
.DS_Store | ||
app/.DS_Store | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,247 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"There are 87000 total train images.\n", | ||
"There are 28 total test images.\n", | ||
"There are 69600 total train images.\n", | ||
"There are 17400 total dog validation images.\n", | ||
"There are 28 total test images.\n", | ||
"----------------------------------------------------------------\n", | ||
" Layer (type) Output Shape Param #\n", | ||
"================================================================\n", | ||
" Conv2d-1 [-1, 10, 48, 48] 100\n", | ||
" MaxPool2d-2 [-1, 10, 24, 24] 0\n", | ||
" Conv2d-3 [-1, 20, 22, 22] 1,820\n", | ||
" MaxPool2d-4 [-1, 20, 11, 11] 0\n", | ||
" Conv2d-5 [-1, 30, 9, 9] 5,430\n", | ||
" Dropout2d-6 [-1, 30, 9, 9] 0\n", | ||
" Linear-7 [-1, 270] 656,370\n", | ||
" Linear-8 [-1, 29] 7,859\n", | ||
" LogSoftmax-9 [-1, 29] 0\n", | ||
"================================================================\n", | ||
"Total params: 671,579\n", | ||
"Trainable params: 671,579\n", | ||
"Non-trainable params: 0\n", | ||
"----------------------------------------------------------------\n", | ||
"Input size (MB): 0.01\n", | ||
"Forward/backward pass size (MB): 0.35\n", | ||
"Params size (MB): 2.56\n", | ||
"Estimated Total Size (MB): 2.92\n", | ||
"----------------------------------------------------------------\n", | ||
"Epoch: 1 \n", | ||
" 20/87000: [>...............................] - ETA 0.0s" | ||
] | ||
}, | ||
{ | ||
"ename": "RuntimeError", | ||
"evalue": "element 0 of tensors does not require grad and does not have a grad_fn", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[0;32m<ipython-input-18-f37309bbd9b9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorchvision\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtransforms\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mtransforms\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mgesture_detector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpy\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0ma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgesture_detector\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mNetwork\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;32m~/Sign Language Detector PyTorch/gesture_detector.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 266\u001b[0m \u001b[0;31m# train the model\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mmodel_scratch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepochs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0muse_cuda\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'saved_model.pt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;32m~/Sign Language Detector PyTorch/gesture_detector.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path)\u001b[0m\n\u001b[1;32m 214\u001b[0m \u001b[0moutput\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtarget\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 216\u001b[0;31m \u001b[0mloss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 217\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/torch/tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(self, gradient, retain_graph, create_graph)\u001b[0m\n\u001b[1;32m 193\u001b[0m \u001b[0mproducts\u001b[0m\u001b[0;34m.\u001b[0m \u001b[0mDefaults\u001b[0m \u001b[0mto\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 194\u001b[0m \"\"\"\n\u001b[0;32m--> 195\u001b[0;31m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 196\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;32m~/anaconda3/lib/python3.7/site-packages/torch/autograd/__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[0;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables)\u001b[0m\n\u001b[1;32m 97\u001b[0m Variable._execution_engine.run_backward(\n\u001b[1;32m 98\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mgrad_tensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 99\u001b[0;31m allow_unreachable=True) # allow_unreachable flag\n\u001b[0m\u001b[1;32m 100\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;31mRuntimeError\u001b[0m: element 0 of tensors does not require grad and does not have a grad_fn" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import cv2\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"import torch\n", | ||
"import torchvision.transforms as transforms\n", | ||
"import gesture_detector.py \n", | ||
"a = gesture_detector.Network()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 17, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "NameError", | ||
"evalue": "name 'Network' is not defined", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[0;32m<ipython-input-17-57fae5e3d28e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNetwork\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_state_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'saved_model.pt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0meval\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | ||
"\u001b[0;31mNameError\u001b[0m: name 'Network' is not defined" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"model = Network()\n", | ||
"model.load_state_dict(torch.load('saved_model.pt'))\n", | ||
"model.eval()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"gestures = {\n", | ||
" 0:'A',\n", | ||
" 1:'B',\n", | ||
" 2:'C',\n", | ||
" 3:'D',\n", | ||
" 4:'E',\n", | ||
" 5:'F',\n", | ||
" 6:'G',\n", | ||
" 7:'H',\n", | ||
" 8:'I',\n", | ||
" 9:'J',\n", | ||
" 10:'K',\n", | ||
" 11:'L',\n", | ||
" 12:'M',\n", | ||
" 13:'N',\n", | ||
" 14:'O',\n", | ||
" 15:'P',\n", | ||
" 16:'Q',\n", | ||
" 17:'R',\n", | ||
" 18:'S',\n", | ||
" 19:'T',\n", | ||
" 20:'U',\n", | ||
" 21:'V',\n", | ||
" 22:'W',\n", | ||
" 23:'X',\n", | ||
" 24:'Y',\n", | ||
" 25:'Z',\n", | ||
" 26:'del',\n", | ||
" 27:'nothing',\n", | ||
" 28:'space'\n", | ||
" \n", | ||
"}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def predict(img_path):\n", | ||
" # load the image and return the predicted breed\n", | ||
" img = Image.open(img_path)\n", | ||
"# img = Image.open(img_path).convert('L')\n", | ||
" transformations = transforms.Compose([transforms.Grayscale(num_output_channels=1),\n", | ||
" transforms.Resize(size=50),\n", | ||
" transforms.ToTensor(),\n", | ||
" transforms.Normalize([0.5],[0.5])])\n", | ||
" image_tensor = transformations(img)[:3,:,:].unsqueeze(0)\n", | ||
"\n", | ||
" # move model inputs to cuda, if GPU available\n", | ||
" if use_cuda:\n", | ||
" image_tensor = image_tensor.cuda()\n", | ||
"\n", | ||
" # get sample outputs\n", | ||
" output = model(image_tensor)\n", | ||
" # convert output probabilities to predicted class\n", | ||
" _, preds_tensor = torch.max(output, 1)\n", | ||
"\n", | ||
" pred = np.squeeze(preds_tensor.numpy()[0]) if not use_cuda else np.squeeze(preds_tensor.cpu().numpy()[0])\n", | ||
"\n", | ||
" return gestures[pred]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"vc = cv2.VideoCapture(0)\n", | ||
"rval, frame = vc.read()\n", | ||
"old_text = ''\n", | ||
"pred_text = ''\n", | ||
"count_frames = 0\n", | ||
"total_str = ''\n", | ||
"flag = False\n", | ||
"\n", | ||
"while True:\n", | ||
" \n", | ||
" if frame is not None: \n", | ||
" \n", | ||
" frame = cv2.flip(frame, 1)\n", | ||
" frame = cv2.resize( frame, (400,400) )\n", | ||
" \n", | ||
" cv2.rectangle(frame, (300,300), (100,100), (0,255,0), 2)\n", | ||
" \n", | ||
" crop_img = frame[100:300, 100:300]\n", | ||
" grey = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)\n", | ||
" \n", | ||
" thresh = cv2.threshold(grey,210,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)[1]\n", | ||
" \n", | ||
" \n", | ||
" blackboard = np.zeros(frame.shape, dtype=np.uint8)\n", | ||
" cv2.putText(blackboard, \"Predicted text - \", (30, 40), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 0))\n", | ||
" if count_frames > 20 and pred_text != \"\":\n", | ||
" total_str += pred_text\n", | ||
" count_frames = 0\n", | ||
" \n", | ||
" if flag == True:\n", | ||
" old_text = pred_text\n", | ||
" pred_text = predict(thresh)\n", | ||
" \n", | ||
" if old_text == pred_text:\n", | ||
" count_frames += 1\n", | ||
" else:\n", | ||
" count_frames = 0\n", | ||
" cv2.putText(blackboard, total_str, (30, 80), cv2.FONT_HERSHEY_TRIPLEX, 1, (255, 255, 127))\n", | ||
" res = np.hstack((frame, blackboard))\n", | ||
" \n", | ||
" cv2.imshow(\"image\", res)\n", | ||
" cv2.imshow(\"hand\", thresh)\n", | ||
" \n", | ||
" rval, frame = vc.read()\n", | ||
" keypress = cv2.waitKey(1)\n", | ||
" if keypress == ord('c'):\n", | ||
" flag = True\n", | ||
" if keypress == ord('q'):\n", | ||
" break\n", | ||
"\n", | ||
"vc.release()\n", | ||
"cv2.destroyAllWindows()\n", | ||
"cv2.waitKey(1)\n", | ||
"\n", | ||
"vc.release()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.