ryankiros · QingyunSun · Oct 7, 2017 · Oct 7, 2017 · Oct 8, 2017 · Oct 8, 2017
diff --git a/.gitignore b/.gitignore
@@ -58,3 +58,10 @@ target/
 
 # Ignore changes to configuration file
 config.py
+
+.DS_STORE
+/decoder
+/skipthoughts
+/vgg
+
+.ropeproject
diff --git a/config.py b/config.py
@@ -13,26 +13,26 @@
 paths = dict()
 
 # Skip-thoughts
-paths['skmodels'] = '/u/rkiros/public_html/models/'
-paths['sktables'] = '/u/rkiros/public_html/models/'
+paths['skmodels'] = 'skipthoughts/'
+paths['sktables'] = 'skipthoughts/'
 
 # Decoder
-paths['decmodel'] = '/ais/gobi3/u/rkiros/storyteller/romance.npz'
-paths['dictionary'] = '/ais/gobi3/u/rkiros/storyteller/romance_dictionary.pkl'
+paths['decmodel'] = 'decoder/romance.npz'
+paths['dictionary'] = 'decoder/romance_dictionary.pkl'
 
 # Image-sentence embedding
-paths['vsemodel'] = '/ais/gobi3/u/rkiros/storyteller/coco_embedding.npz'
+paths['vsemodel'] = 'decoder/coco_embedding.npz'
 
 # VGG-19 convnet
-paths['vgg'] = '/ais/gobi3/u/rkiros/vgg/vgg19.pkl'
+paths['vgg'] = 'vgg19.pkl'
 paths['pycaffe'] = '/u/yukun/Projects/caffe-run/python'
-paths['vgg_proto_caffe'] = '/ais/guppy9/movie2text/neural-storyteller/models/VGG_ILSVRC_19_layers_deploy.prototxt'
-paths['vgg_model_caffe'] = '/ais/guppy9/movie2text/neural-storyteller/models/VGG_ILSVRC_19_layers.caffemodel'
+paths['vgg_proto_caffe'] = 'vgg/VGG_ILSVRC_19_layers_deploy.prototxt'
+paths['vgg_model_caffe'] = 'vgg/VGG_ILSVRC_19_layers.caffemodel'
 
 
 # COCO training captions
-paths['captions'] = '/ais/gobi3/u/rkiros/storyteller/coco_train_caps.txt'
+paths['captions'] = 'decoder/coco_train_caps.txt'
 
 # Biases
-paths['negbias'] = '/ais/gobi3/u/rkiros/storyteller/caption_style.npy'
-paths['posbias'] = '/ais/gobi3/u/rkiros/storyteller/romance_style.npy'
+paths['negbias'] = 'decoder/caption_style.npy'
+paths['posbias'] = 'decoder/romance_style.npy'
diff --git a/generate.py b/generate.py
@@ -30,6 +30,8 @@
 from PIL import ImageFile
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 
+from utils import Timer
+
 
 def story(z, image_loc, k=100, bw=50, lyric=False):
     """
@@ -39,39 +41,45 @@ def story(z, image_loc, k=100, bw=50, lyric=False):
     rawim, im = load_image(image_loc)
 
     # Run image through convnet
-    feats = compute_features(z['net'], im).flatten()
-    feats /= norm(feats)
+    with Timer('Run image through convnet'):
+        feats = compute_features(z['net'], im).flatten()
+        feats /= norm(feats)
 
     # Embed image into joint space
-    feats = embedding.encode_images(z['vse'], feats[None,:])
+    with Timer('Encode image into caption space'):
+        feats = embedding.encode_images(z['vse'], feats[None,:])
 
     # Compute the nearest neighbours
-    scores = numpy.dot(feats, z['cvec'].T).flatten()
-    sorted_args = numpy.argsort(scores)[::-1]
-    sentences = [z['cap'][a] for a in sorted_args[:k]]
+    with Timer('Retrieve captions'):
+        scores = numpy.dot(feats, z['cvec'].T).flatten()
+        sorted_args = numpy.argsort(scores)[::-1]
+        sentences = [z['cap'][a] for a in sorted_args[:k]]
 
     print 'NEAREST-CAPTIONS: '
     for s in sentences[:5]:
         print s
     print ''
 
     # Compute skip-thought vectors for sentences
-    svecs = skipthoughts.encode(z['stv'], sentences, verbose=False)
+    with Timer('Compute skip-thought vector'):
+        svecs = skipthoughts.encode(z['stv'], sentences, verbose=False)
 
     # Style shifting
     shift = svecs.mean(0) - z['bneg'] + z['bpos']
 
     # Generate story conditioned on shift
-    passage = decoder.run_sampler(z['dec'], shift, beam_width=bw)
-    print 'OUTPUT: '
-    if lyric:
-        for line in passage.split(','):
-            if line[0] != ' ':
-                print line
-            else:
-                print line[1:]
-    else:
-        print passage
+    with Timer('Decoding'):
+        passage = decoder.run_sampler(z['dec'], shift, beam_width=bw)
+        print 'OUTPUT: '
+        if lyric:
+            for line in passage.split(','):
+                if line[0] != ' ':
+                    print line
+                else:
+                    print line[1:]
+        else:
+            print passage
+            return passage
 
 
 def load_all():

diff --git a/moxel.yml b/moxel.yml
@@ -0,0 +1,22 @@
+name: neural-storyteller
+tag: latest
+image: py2-caffe
+assets:
+- skipthoughts
+- decoder
+- vgg
+resources:
+  memory: 1Gi
+  cpu: "1"
+input_space:
+  image: image
+output_space:
+  passage: str
+setup:
+- pip install lasagne theano==0.8 nltk
+- pip uninstall -y pydot
+- pip install pydot==1.1
+- python -c "import nltk; nltk.download('punkt')"
+main:
+  type: python
+  entrypoint: serve.py::tell_a_story
diff --git a/serve.py b/serve.py
@@ -0,0 +1,8 @@
+import generate
+z = generate.load_all()
+
+def tell_a_story(image):
+    passage = generate.story(z, image.to_stream(), bw=1)
+    return {
+        'passage': passage
+    }
diff --git a/utils.py b/utils.py
@@ -0,0 +1,165 @@
+import numpy.random as npr
+import numpy as np
+import time
+import os
+import sys
+from datetime import datetime
+
+from StringIO import StringIO
+from pprint import pprint
+
+def make_minibatch_x(data, batch_size, num_iter):
+    '''
+    assume data is a N x D matrix, this method creates mini-batches
+    by draw each mini-batch with replacement from the data for num_iter runs.
+    '''
+    N = data.shape[0]
+    D = data.shape[1]
+    mini_batch = np.zeros((batch_size, D))
+    assert batch_size <= N
+    for it in range(num_iter):
+        ind = npr.choice(range(N), size=batch_size, replace=True)
+        mini_batch[:, :] = data[ind, :]
+        yield mini_batch
+
+def make_minibatch_x_y(data, targets, batch_size, num_iter):
+    '''
+    assume data is a N x D matrix, this method creates mini-batches
+    by draw each mini-batch with replacement from the data for num_iter runs.
+    '''
+    N = data.shape[0]
+    Np = targets.shape[0]
+    D = data.shape[1]
+    Dp = targets.shape[1]
+    batch_shape = list(data.shape)
+    batch_shape[0] = batch_size
+    mini_batch = np.zeros(batch_shape)
+    mini_batch_targets = np.zeros((batch_size, Dp))
+    assert N == Np
+    assert batch_size <= N
+    for it in range(num_iter):
+        ind = npr.choice(range(N), size=batch_size, replace=True)
+        if len(mini_batch.shape) == 2: # matrix data.
+            mini_batch[:, :] = data[ind, :]
+        elif len(mini_batch.shape) == 4: # tensor data.
+            mini_batch[:, :, :, :] = data[ind, :, :, :]
+        mini_batch_targets[:, :] = targets[ind, :]
+        yield mini_batch, mini_batch_targets
+
+def train_test_split(dataset, training_ratio = 0.6):
+    indices = npr.choice(range(len(dataset)), int(len(dataset) * training_ratio), replace=False)
+    train_set = [dataset[ind] for ind in indices]
+    test_set = [dataset[ind] for ind in range(len(dataset)) if ind not in indices]
+    return (train_set, test_set)
+
+def mkdir_if_not_exist(path):
+    if path == '':
+        return
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+def get_runid():
+    return datetime.now().strftime('%m-%d-%y-%H-%M-%S.%f')
+
+
+color2num = dict(
+    gray=30,
+    red=31,
+    green=32,
+    yellow=33,
+    blue=34,
+    magenta=35,
+    cyan=36,
+    white=37,
+    crimson=38
+)
+
+
+def colorize(string, color, bold=False, highlight = False):
+    attr = []
+    num = color2num[color]
+    if highlight: num += 10
+    attr.append(unicode(num))
+    if bold: attr.append('1')
+    return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
+
+
+class Timer(object):
+    def __init__(self, name=None, output=sys.stdout):
+        self.name = name
+        if output and type(output) == str:
+            self.output = open(output, 'w')
+        else:
+            self.output = output
+
+    def __enter__(self):
+        if self.name:
+            print >>self.output, colorize('[%s]\t' % self.name, 'green'),
+        print >>self.output, colorize('Start', 'green')
+        self.tstart = time.time()
+        self.output.flush()
+
+    def __exit__(self, type, value, traceback):
+        if self.name:
+            print >>self.output, colorize('[%s]\t' % self.name, 'green'),
+        print >>self.output, colorize('Elapsed: %s' % (time.time() - self.tstart),
+                                      'green')
+        self.output.flush()
+
+
+MESSAGE_DEPTH = 0
+class Message(object):
+    def __init__(self, msg):
+        self.msg = msg
+
+    def __enter__(self):
+        global MESSAGE_DEPTH #pylint: disable=W0603
+        print colorize('\t'*MESSAGE_DEPTH + '=: ' + self.msg,'magenta')
+        self.tstart = time.time()
+        MESSAGE_DEPTH += 1
+
+    def __exit__(self, etype, *args):
+        global MESSAGE_DEPTH #pylint: disable=W0603
+        MESSAGE_DEPTH -= 1
+        maybe_exc = "" if etype is None else " (with exception)"
+        print colorize('\t'*MESSAGE_DEPTH + "done%s in %.3f seconds"%(maybe_exc, time.time() - self.tstart), 'magenta')
+
+def outdir_from_environ():
+    '''
+    parse experiment output dir from environment variables
+    create directory if necessary
+    '''
+    outdir = os.environ.get('outdir')
+    outdir = outdir if outdir else ''
+    if os.path.exists(outdir):
+        raise Exception('output directory already exists!')
+    mkdir_if_not_exist(outdir)
+    return outdir
+
+
+def to_string(obj):
+    if type(obj) == dict:
+       buf = StringIO()
+       pprint(obj, buf)
+       buf.seek(0)
+       res = buf.read()
+       buf.close()
+       return res
+    else:
+        raise TypeError('Unsupported type %s for to_string' % str(type(obj)))
+
+
+def rgb2yuv(pic):
+    pic = pic.astype(np.float32)
+    res = np.zeros_like(pic)
+    res[:, :, 0] = 0.299 * pic[:, :, 0] + 0.587 * pic[:, :, 1] + 0.114 * pic[:, :, 2]
+    res[:, :, 1] = -0.14713 * pic[:, :, 0] -0.28886 * pic[:, :, 1] + 0.436 * pic[:, :, 2]
+    res[:, :, 2] = 0.615 * pic[:, :, 0] -0.51499 * pic[:, :, 1] - 0.10001 * pic[:, :, 2]
+    return res
+
+
+def get_val(dic, key, default):
+    val = dic.get(key)
+    if val is None:
+        return default
+    return val