Fix a bug when generatig the output from the labels

imatge-upc · Mar 23, 2017 · 72bcb7f · 72bcb7f
1 parent 32581d4
commit 72bcb7f
Show file tree

Hide file tree

Showing 3 changed files with 158 additions and 26 deletions.
diff --git a/notebooks/15 Test Generation Output.ipynb b/notebooks/15 Test Generation Output.ipynb
@@ -3,39 +3,161 @@
   {
    "cell_type": "code",
    "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import json\n",
+    "import numpy as np\n",
+    "from ..src.data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "def import_labels(f):\n",
+    "    ''' Read from a file all the labels from it '''\n",
+    "    lines = f.readlines()\n",
+    "    labels = []\n",
+    "    i = 0\n",
+    "    for l in lines:\n",
+    "        t = l.split('\\t')\n",
+    "        assert int(t[0]) == i\n",
+    "        label = t[1].split('\\n')[0]\n",
+    "        labels.append(label)\n",
+    "        i += 1\n",
+    "    return labels\n",
+    "\n",
+    "def to_categorical(y, nb_classes=None):\n",
+    "    ''' Convert class vector (integers from 0 to nb_classes)\n",
+    "    to binary class matrix, for use with categorical_crossentropy.\n",
+    "    '''\n",
+    "    if not nb_classes:\n",
+    "        nb_classes = np.max(y)+1\n",
+    "    Y = np.zeros((len(y), nb_classes))\n",
+    "    for i in range(len(y)):\n",
+    "        Y[i, y[i]] = 1.\n",
+    "    return Y\n",
+    "\n",
+    "def generate_output(video_info, labels, length=16):\n",
+    "    ''' Given the info of the vide, generate a vector of classes corresponding the output for each\n",
+    "    clip of the video which features have been extracted.\n",
+    "    '''\n",
+    "    nb_frames = video_info['num_frames']\n",
+    "    last_first_name = nb_frames - length + 1\n",
+    "\n",
+    "    start_frames = range(0, last_first_name, length)\n",
+    "\n",
+    "    # Check the output for each frame of the video\n",
+    "    outputs = ['none'] * nb_frames\n",
+    "    for i in range(nb_frames):\n",
+    "        # Pass frame to temporal scale\n",
+    "        t = i / float(nb_frames) * video_info['duration']\n",
+    "        for annotation in video_info['annotations']:\n",
+    "            if t >= annotation['segment'][0] and t <= annotation['segment'][1]:\n",
+    "                outputs[i] = annotation['label']\n",
+    "                label = annotation['label']\n",
+    "                break\n",
+    "\n",
+    "    instances = []\n",
+    "    for start_frame in start_frames:\n",
+    "        # Obtain the label for this isntance and then its output\n",
+    "        output = None\n",
+    "\n",
+    "        outs = outputs[start_frame:start_frame+length]\n",
+    "        if outs.count(label) >= length / 2:\n",
+    "            output = labels.index(label)\n",
+    "        else:\n",
+    "            output = 0\n",
+    "        instances.append(output)\n",
+    "\n",
+    "    return instances"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "with open(\"../dataset/labels.txt\", \"r\") as f:\n",
+    "    labels = import_labels(f)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
    "metadata": {
     "collapsed": false
    },
    "outputs": [
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Using Theano backend.\n"
+      "{'subset': 'validation', 'num_frames': 4157, 'url': 'https://www.youtube.com/watch?v=Uw_0h2UrfyY', 'duration': 139.04, 'resolution': '426x240', 'annotations': [{'segment': [19.07183775351014, 117.91560686427458], 'label': 'Ballet'}]}\n"
      ]
     }
    ],
    "source": [
-    "from work.processing.output import get_temporal_predictions_2\n",
-    "import numpy as np\n",
-    "from work.dataset.activitynet import ActivityNetDataset\n",
-    "import pprint\n",
-    "\n",
-    "detection_predictions_path = '../downloads/predictions/lstm_activity_detection/v1/classes/'\n",
-    "classification_predictions_path = '../downloads/predictions/lstm_activity_classification/v1/classes/'\n",
-    "\n",
-    "dataset = ActivityNetDataset(\n",
-    "    videos_path='../dataset/videos.json',\n",
-    "    labels_path='../dataset/labels.txt'\n",
-    ")"
+    "with open(\"../dataset/videos.json\", \"r\") as f:\n",
+    "    videos_info = json.load(f)\n",
+    "video_info = videos_info['Uw_0h2UrfyY']\n",
+    "print(video_info)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 31,
    "metadata": {
     "collapsed": false
    },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 177, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
+      "[[ 0.  0.  0. ...,  0.  0.  0.]\n",
+      " [ 0.  0.  0. ...,  0.  0.  0.]\n",
+      " [ 0.  0.  0. ...,  0.  0.  0.]\n",
+      " ..., \n",
+      " [ 1.  0.  0. ...,  0.  0.  0.]\n",
+      " [ 1.  0.  0. ...,  0.  0.  0.]\n",
+      " [ 1.  0.  0. ...,  0.  0.  0.]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "instances = generate_output(video_info, labels, length=16)\n",
+    "print(instances)\n",
+    "Y = to_categorical(instances, nb_classes=200)\n",
+    "print(Y[100:300])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Old test with old code implemented."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -57,7 +179,9 @@
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [
     {
@@ -92,7 +216,9 @@
    "cell_type": "code",
    "execution_count": 4,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [
     {
@@ -139,7 +265,9 @@
    "cell_type": "code",
    "execution_count": 5,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [
     {
@@ -239,7 +367,9 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true
+    "collapsed": true,
+    "deletable": true,
+    "editable": true
    },
    "outputs": [],
    "source": []
@@ -261,7 +391,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.1"
+   "version": "3.6.0"
   }
  },
  "nbformat": 4,

diff --git a/scripts/create_stateful_dataset.py b/scripts/create_stateful_dataset.py
@@ -3,14 +3,16 @@
 import os
 import random
 
-import h5py
 import numpy as np
 from progressbar import ProgressBar
 
+import h5py
 from src.data import generate_output, import_labels, to_categorical
 
 
-def create_stateful_dataset(video_features_file, videos_info, labels, output_path, batch_size, timesteps, subset=None):
+def create_stateful_dataset(video_features_file, videos_info, labels,
+                            output_path, batch_size, timesteps,
+                            subset=None):
     features_size = 4096
     output_size = 201
 

diff --git a/src/data.py b/src/data.py
@@ -31,8 +31,8 @@ def to_categorical(y, nb_classes=None):
     return Y
 
 def generate_output(video_info, labels, length=16):
-    ''' Given the info of the vide, generate a vector of classes corresponding the output for each
-    clip of the video which features have been extracted.
+    ''' Given the info of the vide, generate a vector of classes corresponding
+    the output for each clip of the video which features have been extracted.
     '''
     nb_frames = video_info['num_frames']
     last_first_name = nb_frames - length + 1
@@ -46,7 +46,7 @@ def generate_output(video_info, labels, length=16):
         t = i / float(nb_frames) * video_info['duration']
         for annotation in video_info['annotations']:
             if t >= annotation['segment'][0] and t <= annotation['segment'][1]:
-                outputs[i] = labels.index(annotation['label'])
+                outputs[i] = annotation['label']
                 label = annotation['label']
                 break
 
@@ -59,7 +59,7 @@ def generate_output(video_info, labels, length=16):
         if outs.count(label) >= length / 2:
             output = labels.index(label)
         else:
-            output = labels[0]
+            output = 0
         instances.append(output)
 
     return instances