model_code_tf

malleshamdasari · malleshamdasari · commit a96d4e49fcb3 · 2018-04-21T18:49:18.000-04:00
diff --git a/VQA_deep_learning.ipynb b/VQA_deep_learning.ipynb
@@ -0,0 +1,319 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/usr/local/lib/python3.5/dist-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
+      "  from ._conv import register_converters as _register_converters\n"
+     ]
+    }
+   ],
+   "source": [
+    "import tensorflow as tf \n",
+    "import numpy as np\n",
+    "import cv2\n",
+    "import matplotlib.pyplot as plt\n",
+    "from datetime import datetime"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# for Tensorboard logging and visualization\n",
+    "now = datetime.utcnow().strftime(\"%Y%m%d%H%M%S\")\n",
+    "root_logdir = \"tf_logs\"\n",
+    "logdir = \"{}/run-{}/\".format(root_logdir, now)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# a list that specifies convolution-pooling architecture; \n",
+    "# list index indicate layer position in stack; \n",
+    "# a pooling layer is represented by a tuple: (pooling type, kernel_size, strides) \n",
+    "# a convolution layer is represented by a typle: (filter_height, filter_width, depth)\n",
+    "layers = [(5, 5, 6),\n",
+    "          ('max', (1,2,2,1), (1,2,2,1)),\n",
+    "          (5, 5, 16), \n",
+    "          ('max', (1,2,2,1), (1,2,2,1)),\n",
+    "          (5, 5, 60),\n",
+    "          ('max', (1,2,2,1), (1,2,2,1))]  \n",
+    "\n",
+    "def conv_pool(x, layers):\n",
+    "    out = x\n",
+    "    n_conv, n_pool = 0, 0\n",
+    "    prev_depth = int(x.shape[3])\n",
+    "    for l in layers:\n",
+    "        if type(l[0]) == int:\n",
+    "            n_conv += 1\n",
+    "            with tf.variable_scope('conv_{}'.format(n_conv), reuse = tf.AUTO_REUSE):\n",
+    "                w = tf.get_variable('filter', initializer=tf.truncated_normal((l[0], l[1], prev_depth, l[2]),0,0.1))\n",
+    "                b = tf.get_variable('bias', initializer=tf.zeros(l[2]))  \n",
+    "                out = tf.nn.relu(tf.nn.conv2d(out, w, strides=(1,1,1,1), padding='SAME') + b)\n",
+    "            prev_depth = l[2]\n",
+    "        elif l[0] == 'max':\n",
+    "            n_pool += 1\n",
+    "            out = tf.nn.max_pool(out, l[1], l[2], padding='SAME', name='pool_{}'.format(n_pool))\n",
+    "        elif l[0] == 'avg':\n",
+    "            n_pool += 1\n",
+    "            out = tf.nn.avg_pool(out, l[1], l[2], padding='SAME', name='pool_{}'.format(n_pool))\n",
+    "    return out\n",
+    "\n",
+    "# get all frames from video downscaled by a factor\n",
+    "# return an ndarray of shape (n_frames, height, width, channels)\n",
+    "def get_frames(path, n_frames, downscale_factor):\n",
+    "    cap = cv2.VideoCapture(path)\n",
+    "    seq = []\n",
+    "    count = 0\n",
+    "    while True:\n",
+    "        success,frame = cap.read()\n",
+    "        if count == n_frames or not success:\n",
+    "            break\n",
+    "        # downscale frame\n",
+    "        width = int(frame.shape[1] / downscale_factor)\n",
+    "        height = int(frame.shape[0] / downscale_factor)\n",
+    "        seq.append(cv2.resize(frame, (width, height), interpolation = cv2.INTER_AREA))\n",
+    "        count += 1\n",
+    "    return np.stack(seq)\n",
+    "\n",
+    "# mini-batch generator\n",
+    "def next_batch(path, labels, n_batches, batch_size, n_frames, downscale_factor):\n",
+    "    for i in range(n_batches):\n",
+    "        x_batch, y_batch = [], []\n",
+    "        for j in range(0, batch_size):\n",
+    "            x_batch.append(get_frames(path.format(i*batch_size+j+1), n_frames, downscale_factor))\n",
+    "            y_batch.append(labels[i*batch_size+j])\n",
+    "        x_batch = np.stack(x_batch)\n",
+    "        yield x_batch, y_batch\n",
+    "        \n",
+    "# generate feature maps for each video in mini-batch\n",
+    "# x has shape (batch_size, n_frames, height, width, channels)\n",
+    "def get_feature_maps(x):\n",
+    "    instances = []\n",
+    "    for i in range(x.shape[0]):\n",
+    "        instances.append(tf.contrib.layers.flatten(conv_pool(x[i, :, :, :, :], layers)))\n",
+    "    return tf.stack(instances, axis=0)\n",
+    "\n",
+    "def score_to_label(scores, thresh_1, thresh_2):\n",
+    "    for x in np.nditer(scores, op_flags=['readwrite']):\n",
+    "        if x < thresh_1:\n",
+    "            x[...] = 0\n",
+    "        elif x < thresh_2:\n",
+    "            x[...] = 1\n",
+    "        else:\n",
+    "            x[...] = 2\n",
+    "    return scores"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/contrib/learn/python/learn/datasets/base.py:198: retry (from tensorflow.contrib.learn.python.learn.datasets.base) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "Use the retry module or similar alternatives.\n",
+      "(20, 100, 30600)\n"
+     ]
+    }
+   ],
+   "source": [
+    "path = '/home/mallesh/video-qoe-labeling/dataset/trace_{}.mp4'\n",
+    "\n",
+    "height, width, n_channels = 1080, 1920, 3\n",
+    "downscale_factor = 8\n",
+    "n_frames = 100\n",
+    "n_classes = 3\n",
+    "n_batches, batch_size = 4, 20\n",
+    "n_hidden = 100 # number of hidden cells in LSTM\n",
+    "\n",
+    "X = tf.placeholder(tf.float32, shape=\n",
+    "                   (batch_size, n_frames, int(height/downscale_factor), int(width/downscale_factor), n_channels))\n",
+    "y = tf.placeholder(tf.int32, shape=(batch_size,))\n",
+    "\n",
+    "labels = score_to_label(np.loadtxt('/home/mallesh/video-qoe-labeling/dataset/mos.txt'), 2, 3.8)\n",
+    "\n",
+    "X_features = get_feature_maps(X)\n",
+    "print(X_features.shape)\n",
+    "\n",
+    "cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)\n",
+    "output, _ = tf.nn.dynamic_rnn(cell, X_features, initial_state = cell.zero_state(batch_size, dtype=tf.float32))\n",
+    "\n",
+    "with tf.variable_scope('out', reuse = tf.AUTO_REUSE):\n",
+    "    w = tf.get_variable('weight', shape=(n_hidden, n_classes))\n",
+    "    b = tf.get_variable('bias', initializer=tf.zeros(n_classes))\n",
+    "    pred = tf.matmul(output[:,-1,:], w) + b\n",
+    "\n",
+    "loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=y))\n",
+    "optimizer = tf.train.AdamOptimizer()\n",
+    "training_op = optimizer.minimize(loss)\n",
+    "loss_summary = tf.summary.scalar('loss', loss)\n",
+    "file_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(20, 100, 135, 240, 3)\n",
+      "[[ 0.37927836 -0.08560888  2.6771262 ]\n",
+      " [ 0.44744128  0.3947954   2.5882244 ]\n",
+      " [ 0.04256314  0.39614558  1.9952923 ]\n",
+      " [ 0.01711836  0.41108784  2.0334034 ]\n",
+      " [ 0.32527617  0.24322288  3.2082634 ]\n",
+      " [-0.40870816  0.18336628  2.033424  ]\n",
+      " [ 0.35842422  0.00692615  1.7578257 ]\n",
+      " [-0.32743627  0.318483    1.9235626 ]\n",
+      " [ 0.56437546  0.28331208  3.1881766 ]\n",
+      " [-0.38265842 -0.08463313  2.4024298 ]\n",
+      " [ 0.01732781  0.41098124  2.033123  ]\n",
+      " [ 0.01711985  0.41108695  2.0334013 ]\n",
+      " [ 0.27588528  0.25923365  1.6460352 ]\n",
+      " [ 0.05016926  0.39167893  1.9839    ]\n",
+      " [ 0.05264747  0.39260918  1.9831704 ]\n",
+      " [ 0.30169457  0.24605171  3.2600179 ]\n",
+      " [ 0.07404689  0.41454732  2.0233574 ]\n",
+      " [ 0.32336068  0.2669493   1.6475667 ]\n",
+      " [ 0.36009893  0.5279878   2.7528164 ]\n",
+      " [ 0.31793475  0.2614837   1.6379923 ]]\n",
+      "1.7771614\n",
+      "(20, 100, 135, 240, 3)\n",
+      "[[-0.06204156  0.79496956  1.2368748 ]\n",
+      " [ 0.19840315  1.3091224   0.39372188]\n",
+      " [-0.0292217   0.8915585   0.75091344]\n",
+      " [ 0.0919654   0.88935757  0.7646267 ]\n",
+      " [ 0.1628941   0.7908838   1.2623284 ]\n",
+      " [ 0.38111767  0.8404923   1.1870992 ]\n",
+      " [ 0.0832461   0.88951564  0.76364017]\n",
+      " [-0.06204156  0.79496956  1.2368748 ]\n",
+      " [ 0.08344238  0.8895122   0.76366234]\n",
+      " [ 0.07251229  1.0132546   0.71706533]\n",
+      " [ 0.08342844  0.8895123   0.7636608 ]\n",
+      " [ 0.08324607  0.88951564  0.76364017]\n",
+      " [ 0.0832461   0.88951564  0.76364017]\n",
+      " [-0.06160454  0.7949616   1.2369243 ]\n",
+      " [ 0.19840315  1.3091224   0.39372188]\n",
+      " [ 0.11835345  0.88887787  0.7676129 ]\n",
+      " [ 0.11835345  0.88887787  0.7676129 ]\n",
+      " [ 0.11844786  0.8888762   0.76762354]\n",
+      " [ 0.15422454  0.8446137   1.1614242 ]\n",
+      " [ 0.0832461   0.88951564  0.76364017]]\n",
+      "1.2838373\n",
+      "(20, 100, 135, 240, 3)\n",
+      "[[ 6.6104129e-02  9.5988196e-01  5.6221539e-01]\n",
+      " [-1.5516879e-01  8.9468837e-01  4.0705174e-01]\n",
+      " [ 2.9765752e-01  1.0281045e+00  7.2458786e-01]\n",
+      " [-3.0302963e-01  8.0091304e-01  8.8270134e-01]\n",
+      " [ 7.1249202e-02  9.6139783e-01  5.6582326e-01]\n",
+      " [-1.5259342e-01  9.6641046e-01  5.4156667e-01]\n",
+      " [-3.0302963e-01  8.0091304e-01  8.8270134e-01]\n",
+      " [ 1.1315355e-01  1.4946108e+00  5.0605452e-01]\n",
+      " [ 7.1171537e-02  9.6137494e-01  5.6576878e-01]\n",
+      " [-1.6045438e-01  9.2747623e-01  4.0413094e-01]\n",
+      " [ 5.6517433e-02  9.8093265e-01  5.8736938e-01]\n",
+      " [ 7.1892157e-02  9.6383816e-01  5.6365997e-01]\n",
+      " [ 6.1070051e-02  1.5441496e+00  3.7803373e-01]\n",
+      " [-1.5515684e-01  8.9469188e-01  4.0706015e-01]\n",
+      " [-1.6489974e-04  1.0756075e+00  8.7444943e-01]\n",
+      " [ 7.0583269e-02  9.6120173e-01  5.6535625e-01]\n",
+      " [ 7.1248129e-02  9.6139753e-01  5.6582248e-01]\n",
+      " [-1.5516931e-01  8.9468843e-01  4.0705168e-01]\n",
+      " [-1.5516931e-01  8.9468843e-01  4.0705168e-01]\n",
+      " [ 9.0059519e-02  1.5057209e+00  3.3565113e-01]]\n",
+      "1.1355282\n",
+      "(20, 100, 135, 240, 3)\n",
+      "[[-0.05554762  1.2411804   0.7248899 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [ 0.0207992   1.1436464   1.2259097 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [ 0.09306119  1.028489    0.9170438 ]\n",
+      " [ 0.0207992   1.1436464   1.2259097 ]\n",
+      " [ 0.0207992   1.1436464   1.2259097 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [ 0.19181803  0.91051793  0.36328435]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [-0.05554762  1.2411804   0.7248898 ]\n",
+      " [ 0.09306119  1.028489    0.9170438 ]\n",
+      " [ 0.09306119  1.028489    0.9170438 ]\n",
+      " [ 0.34042683  0.6978265   0.5554383 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]\n",
+      " [-0.05554762  1.2411804   0.7248899 ]]\n",
+      "1.1465046\n"
+     ]
+    }
+   ],
+   "source": [
+    "saver = tf.train.Saver()\n",
+    "with tf.Session() as sess:\n",
+    "    sess.run(tf.global_variables_initializer())\n",
+    "    batch_num = 0\n",
+    "    for X_batch, y_batch in next_batch(path, labels, n_batches, batch_size, n_frames, downscale_factor):      \n",
+    "        print(X_batch.shape)\n",
+    "        batch_num += 1\n",
+    "        summary_str = loss_summary.eval(feed_dict={X: X_batch, y: y_batch})\n",
+    "        file_writer.add_summary(summary_str, batch_num)\n",
+    "        sess.run(training_op, feed_dict={X: X_batch, y: y_batch})\n",
+    "        saver.save(sess, '/tmp/after_batch_{}.ckpt'.format(batch_num))\n",
+    "        print(pred.eval(feed_dict={X: X_batch, y: y_batch}))\n",
+    "        print(loss.eval(feed_dict={X: X_batch, y: y_batch}))\n",
+    "    \n",
+    "    saver.save(sess, '/tmp/final.ckpt')\n",
+    "\n",
+    "file_writer.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}