Skip to content

Commit

Permalink
Run all pipeline script
Browse files Browse the repository at this point in the history
  • Loading branch information
albertomontesg committed Jul 4, 2016
1 parent 634cb8c commit 19936d4
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 8 deletions.
3 changes: 1 addition & 2 deletions data/models/get_temporal_location_weights.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
wget --no-check-certificate
'https://www.dropbox.com/s/1vq55hw17y8k4vi/temporal-location_weights.hdf5?dl=0' -O ./temporal-locations_weights.h5
wget --no-check-certificate 'https://www.dropbox.com/s/1vq55hw17y8k4vi/temporal-location_weights.hdf5?dl=0' -O ./temporal-location_weights.hdf5
2 changes: 0 additions & 2 deletions scripts/extract_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ def extract_features(videos_dir, output_dir, batch_size, num_threads, queue_size
output_file = h5py.File(output_path, mode)
extracted_videos = output_file.keys()
output_file.close()
#print(extracted_videos)
#return

videos_ids = [v[:-4] for v in os.listdir(videos_dir) if v[-4:] == '.mp4']

Expand Down
7 changes: 4 additions & 3 deletions scripts/process_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@


def process_prediction(experiment_id, predictions_path, output_path, smoothing_k, activity_threshold, subset=None):
clip_length = 16.

if subset == None:
subsets = ['validation', 'testing']
Expand Down Expand Up @@ -62,7 +63,7 @@ def process_prediction(experiment_id, predictions_path, output_path, smoothing_k
# Post Processing to obtain the detection
prediction_smoothed = smoothing(prediction, k=smoothing_k)
activities_idx, startings, endings, scores = activity_localization(
prediction,
prediction_smoothed,
activity_threshold
)
result_detection = []
Expand All @@ -71,8 +72,8 @@ def process_prediction(experiment_id, predictions_path, output_path, smoothing_k
result_detection.append({
'score': score,
'segment': [
s * nb_clips / fps,
e * nb_clips / fps
s * clip_length / fps,
e * clip_length / fps
],
'label': label
})
Expand Down
189 changes: 189 additions & 0 deletions scripts/run_all_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import argparse

import numpy as np

from keras.layers import (LSTM, BatchNormalization, Convolution3D, Dense, Dropout, Flatten, Input,
MaxPooling3D, TimeDistributed, ZeroPadding3D)
from keras.models import Model, Sequential
from src.data import import_labels
from src.io import get_duration, get_num_frames, video_to_array
from src.processing import activity_localization, get_classification, smoothing


def run_all_pipeline(input_video, smoothing_k, activity_threshold):
input_size = (112, 112)
length = 16

# Load labels
with open('dataset/labels.txt', 'r') as f:
labels = import_labels(f)

print('Reading Video...')
video_array = video_to_array(input_video, resize=input_size)
if video_array is None:
raise Exception('The video could not be read')
nb_frames = get_num_frames(input_video)
duration = get_duration(input_video)
fps = nb_frames / duration
print('Duration: {:.1f}s'.format(duration))
print('FPS: {:.1f}'.format(fps))
print('Number of frames: {}'.format(nb_frames))

nb_clips = nb_frames // length
video_array = video_array.transpose(1, 0, 2, 3)
video_array = video_array[:nb_clips*length,:,:,:]
video_array = video_array.reshape((nb_clips, length, 3, 112, 112))
video_array = video_array.transpose(0, 2, 1, 3, 4)

# Load C3D model and mean
print('Loading C3D network...')
model = C3D_conv_features(True)
model.compile(optimizer='sgd', loss='mse')
mean_total = np.load('data/models/c3d-sports1M_mean.npy')
mean = np.mean(mean_total, axis=(0, 2, 3, 4), keepdims=True)

# Extract features
print('Extracting features...')
X = video_array - mean
Y = model.predict(X, batch_size=1, verbose=1)

# Load the temporal localization network
print('Loading temporal localization network...')
model_localization = temporal_localization_network(True)
model_localization.compile(optimizer='rmsprop', loss='categorical_crossentropy')

# Predict with the temporal localization network
print('Predicting...')
Y = Y.reshape(nb_clips, 1, 4096)
prediction = model_localization.predict(Y, batch_size=1, verbose=1)
prediction = prediction.reshape(nb_clips, 201)

# Post processing the predited output
print('Post-processing output...')
labels_idx, scores = get_classification(prediction, k=5)
print('Video: {}\n'.format(input_video))
print('Classification:')
for idx, score in zip(labels_idx, scores):
label = labels[idx]
print('{:.4f}\t{}'.format(score, label))

prediction_smoothed = smoothing(prediction, k=smoothing_k)
activities_idx, startings, endings, scores = activity_localization(
prediction_smoothed,
activity_threshold
)

print('\nDetection:')
print('Score\tInterval\t\tActivity')
for idx, s, e, score in zip(activities_idx, startings, endings, scores):
start = s * float(length) / fps
end = e * float(length) / fps
label = labels[idx]
print('{:.4f}\t{:.1f}s - {:.1f}s\t\t{}'.format(score, start, end, label))


def C3D_conv_features(summary=False):
""" Return the Keras model of the network until the fc6 layer where the
convolutional features can be extracted.
"""
from keras.layers.convolutional import Convolution3D, MaxPooling3D, ZeroPadding3D
from keras.layers.core import Dense, Dropout, Flatten
from keras.models import Sequential

model = Sequential()
# 1st layer group
model.add(Convolution3D(64, 3, 3, 3, activation='relu',
border_mode='same', name='conv1',
subsample=(1, 1, 1),
input_shape=(3, 16, 112, 112),
trainable=False))
model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2),
border_mode='valid', name='pool1'))
# 2nd layer group
model.add(Convolution3D(128, 3, 3, 3, activation='relu',
border_mode='same', name='conv2',
subsample=(1, 1, 1),
trainable=False))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
border_mode='valid', name='pool2'))
# 3rd layer group
model.add(Convolution3D(256, 3, 3, 3, activation='relu',
border_mode='same', name='conv3a',
subsample=(1, 1, 1),
trainable=False))
model.add(Convolution3D(256, 3, 3, 3, activation='relu',
border_mode='same', name='conv3b',
subsample=(1, 1, 1),
trainable=False))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
border_mode='valid', name='pool3'))
# 4th layer group
model.add(Convolution3D(512, 3, 3, 3, activation='relu',
border_mode='same', name='conv4a',
subsample=(1, 1, 1),
trainable=False))
model.add(Convolution3D(512, 3, 3, 3, activation='relu',
border_mode='same', name='conv4b',
subsample=(1, 1, 1),
trainable=False))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
border_mode='valid', name='pool4'))
# 5th layer group
model.add(Convolution3D(512, 3, 3, 3, activation='relu',
border_mode='same', name='conv5a',
subsample=(1, 1, 1),
trainable=False))
model.add(Convolution3D(512, 3, 3, 3, activation='relu',
border_mode='same', name='conv5b',
subsample=(1, 1, 1),
trainable=False))
model.add(ZeroPadding3D(padding=(0, 1, 1), name='zeropadding'))
model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),
border_mode='valid', name='pool5'))
model.add(Flatten(name='flatten'))
# FC layers group
model.add(Dense(4096, activation='relu', name='fc6', trainable=False))
model.add(Dropout(.5, name='do1'))
model.add(Dense(4096, activation='relu', name='fc7'))
model.add(Dropout(.5, name='do2'))
model.add(Dense(487, activation='softmax', name='fc8'))

# Load weights
model.load_weights('data/models/c3d-sports1M_weights.h5')

for _ in range(4):
model.pop_layer()

if summary:
print(model.summary())
return model

def temporal_localization_network(summary=False):
input_features = Input(batch_shape=(1, 1, 4096,), name='features')
input_normalized = BatchNormalization(name='normalization')(input_features)
input_dropout = Dropout(p=.5)(input_normalized)
lstm = LSTM(512, return_sequences=True, stateful=True, name='lsmt1')(input_dropout)
output_dropout = Dropout(p=.5)(lstm)
output = TimeDistributed(Dense(201, activation='softmax'), name='fc')(output_dropout)

model = Model(input=input_features, output=output)
model.load_weights('data/models/temporal-location_weights.hdf5')

if summary:
model.summary()
return model

if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run all pipeline. Given a video, classify it and temporal localize the activity on it')

parser.add_argument('-i', '--input-video', type=str, dest='input_video', help='Path to the input video')
parser.add_argument('-k', type=int, dest='smoothing_k', default=5, help='Smoothing factor at post-processing (default: %(default)s)')
parser.add_argument('-t', type=float, dest='activity_threshold', default=.2, help='Activity threshold at post-processing (default: %(default)s)')

args = parser.parse_args()

run_all_pipeline(
args.input_video,
args.smoothing_k,
args.activity_threshold
)
33 changes: 33 additions & 0 deletions src/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,36 @@ def video_to_array(video_path, resize=None, start_frame=0, end_frame=None,
if dim_ordering == 'th':
video = video.transpose(3, 0, 1, 2)
return video


def get_num_frames(video_path):
''' Return the number of frames of the video track of the video given '''
import cv2
if cv2.__version__ >= '3.0.0':
CAP_PROP_FRAME_COUNT = cv2.CAP_PROP_FRAME_COUNT
else:
CAP_PROP_FRAME_COUNT = cv2.cv.CV_CAP_PROP_FRAME_COUNT

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise Exception('Could not open the video')
num_frames = int(cap.get(CAP_PROP_FRAME_COUNT))
return num_frames

def get_duration(video_path):
''' Return the duration of the video track of the video given '''
import cv2
if cv2.__version__ >= '3.0.0':
CAP_PROP_FRAME_COUNT = cv2.CAP_PROP_FRAME_COUNT
CAP_PROP_FPS = cv2.CAP_PROP_FPS
else:
CAP_PROP_FRAME_COUNT = cv2.cv.CV_CAP_PROP_FRAME_COUNT
CAP_PROP_FPS = cv2.cv.CV_CAP_PROP_FPS

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
raise Exception('Could not open the video')
num_frames = int(cap.get(CAP_PROP_FRAME_COUNT))
fps = float(cap.get(CAP_PROP_FPS))
duration = num_frames / fps
return duration
2 changes: 1 addition & 1 deletion src/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def activity_localization(sequence_class_prob, activity_threshold=.2):
padded = np.pad(activity_tag, pad_width=1, mode='constant')
dif = padded[1:] - padded [:-1]

indexes = np.arange(dif.size)
indexes = np.arange(dif.size).astype(np.float32)
startings = indexes[dif==1]
endings = indexes[dif==-1]

Expand Down

0 comments on commit 19936d4

Please sign in to comment.