From eb0576252fe9e5643183bf4ff2c6d0f016eccd04 Mon Sep 17 00:00:00 2001
From: "Alex J. Champandard" <alexjc@aigamedev.com>
Date: Sun, 6 Mar 2016 23:23:49 +0100
Subject: [PATCH] Working with ugly code, tested on CPU but pretty slow.

---
 doodle.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 62 insertions(+), 11 deletions(-)

diff --git a/doodle.py b/doodle.py
index d2fc49e..38090a5 100644
--- a/doodle.py
+++ b/doodle.py
@@ -5,6 +5,7 @@
 
 import theano
 import theano.tensor as T
+import theano.tensor.nnet.neighbours
 
 import lasagne
 from lasagne.layers import Conv2DLayer as ConvLayer, Pool2DLayer as PoolLayer
@@ -23,7 +24,7 @@ def __init__(self, layers):
     def build_model(self):
         net = {}
 
-        # Main network for the primary image.        
+        # First network for the main image.        
         net['img']   = InputLayer((1, 3, None, None))
         net['conv1_1'] = ConvLayer(net['img'],   64, 3, pad=1)
         net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1)
@@ -39,7 +40,7 @@ def build_model(self):
         net['conv4_1'] = ConvLayer(net['pool3'],   512, 3, pad=1)
         net['main']    = ConvLayer(net['conv4_1'], 512, 3, pad=1)
 
-        # Secondary network for the semantic map.
+        # Second network for the semantic map.
         net['map'] = InputLayer((1, 3, None, None))
         net['map_2'] = PoolLayer(net['map'], 2, mode='average_exc_pad')
         net['map_3'] = PoolLayer(net['map'], 4, mode='average_exc_pad')
@@ -49,6 +50,10 @@ def build_model(self):
         net['sem3_1'] = ConcatLayer([net['conv3_1'], net['map_3']])
         net['sem4_1'] = ConcatLayer([net['conv4_1'], net['map_4']])
 
+        # Third network for the nearest neighbors.
+        net['nn3_1'] = ConvLayer(net['sem3_1'], 900, 3, b=None, pad=0)
+        net['nn4_1'] = ConvLayer(net['sem4_1'], 196, 3, b=None, pad=0)
+
         self.network = net
 
     def load_params(self): 
@@ -67,31 +72,77 @@ def load_params(self):
 class NeuralGenerator(object):
 
     def __init__(self):
-        self.model = Model(layers=['sem3_1', 'sem4_1', 'conv4_1'])
+        self.model = Model(layers=['sem3_1', 'sem4_1', 'conv4_1', 'nn3_1', 'nn4_1'])
         self.iteration = 0
-
-        content_image = scipy.ndimage.imread('tree.jpg', mode='RGB')
+        
+        self.prepare_content()
+        self.prepare_style()
+
+        losses = self.style_loss # + [self.variation_loss(self.model.tensor_img)] self.content_loss
+        grad = T.grad(sum(losses), self.model.tensor_img)
+        self.compute_grad_and_losses = theano.function([self.model.tensor_img, self.model.tensor_map], [grad] + losses)
+        
+    def prepare_content(self):
+        content_image = scipy.ndimage.imread('tree.128.jpg', mode='RGB')
         self.content_image = self.prepare_image(content_image)
+        self.content_map = np.ones((1, 3, 128, 128))
 
         self.content_features = self.model.tensor_outputs['conv4_1'].eval({self.model.tensor_img: self.content_image})
-        self.content_loss = T.mean((self.model.tensor_outputs['conv4_1'] - self.content_features) ** 2.0)
+        self.content_loss = [T.mean((self.model.tensor_outputs['conv4_1'] - self.content_features) ** 2.0)]
+
+    def prepare_style(self):
+        style_image = scipy.ndimage.imread('tree.128.jpg', mode='RGB')
+        self.style_image = self.prepare_image(style_image)
+        self.style_map = np.ones((1, 3, 128, 128))
+
+        for layer in ['3_1', '4_1']:
+            extractor = theano.function([self.model.tensor_img, self.model.tensor_map],
+                                        self.extract_patches(self.model.tensor_outputs['sem'+layer]))
+            patches, norm = extractor(self.style_image, self.style_map)
+            print(patches.shape)
+
+            l = self.model.network['nn'+layer]
+            l.N = theano.shared(norm)
+            l.W.set_value(patches[:,:,::-1,::-1])
+            assert l.num_filters == patches.shape[0]
+
+        def style_loss(l):
+            layer = self.model.network['nn'+l]
+            dist = self.model.tensor_outputs['nn'+l]
+            patches, norm = self.extract_patches(self.model.tensor_outputs['sem'+l])
+            dist = dist.reshape((dist.shape[1], -1)) / norm.reshape((1,-1)) / layer.N.reshape((-1,1))
+
+            best = dist.argmax(axis=0)
+            return T.mean((patches[:,:,::-1,::-1] - layer.W[best]) ** 2.0)
+
+        self.style_loss = [style_loss('3_1'), style_loss('4_1')]
+
+    def extract_patches(self, f, size=3, stride=1):
+        patches = theano.tensor.nnet.neighbours.images2neibs(f, (size, size), (stride, stride), mode='valid')
+        patches = patches.reshape((-1, patches.shape[0] // f.shape[1], size, size)).dimshuffle((1, 0, 2, 3))
+
+        norm = T.sqrt(T.sum(patches ** 2.0, axis=(1,2,3), keepdims=True))
+        return patches, norm
 
-        grad = T.grad(self.content_loss, self.model.tensor_img)
-        self.compute_loss_and_grad = theano.function([self.model.tensor_img], [self.content_loss, grad])
+    def variation_loss(self, x):
+        return (((x[:,:,:-1,:-1] - x[:,:,1:,:-1])**2 + (x[:,:,:-1,:-1] - x[:,:,:-1,1:])**2)**1.25).mean()
 
     def evaluate(self, Xn):
         current_img = Xn.reshape(self.content_image.shape) - self.model.pixel_mean
-        loss, grads = self.compute_loss_and_grad(current_img)
+        grads, *losses = self.compute_grad_and_losses(current_img, self.content_map)
+        loss = sum(losses)
 
         scipy.misc.toimage(self.finalize_image(Xn), cmin=0, cmax=255).save('frames/test%04d.png'%self.iteration)
 
-        print(self.iteration, 'loss', loss, 'gradients', grads.min(), grads.max())
+        print(self.iteration, 'losses', [float(l/1000) for l in losses], 'gradients', grads.min(), grads.max())
 
         self.iteration += 1
         return loss, grads.flatten().astype(np.float64)
 
     def run(self):
-        Xn = np.random.uniform(0, 255, self.content_image.shape[2:] + (3,)).astype(np.float32)
+        # Xn = self.content_image[0] + self.model.pixel_mean
+        
+        Xn = np.random.uniform(64, 192, self.content_image.shape[2:] + (3,)).astype(np.float32)
         data_bounds = np.zeros((np.product(Xn.shape), 2), dtype=np.float64)
         data_bounds[:] = (0.0, 255.0)