From eb0576252fe9e5643183bf4ff2c6d0f016eccd04 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sun, 6 Mar 2016 23:23:49 +0100 Subject: [PATCH] Working with ugly code, tested on CPU but pretty slow. --- doodle.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/doodle.py b/doodle.py index d2fc49e..38090a5 100644 --- a/doodle.py +++ b/doodle.py @@ -5,6 +5,7 @@ import theano import theano.tensor as T +import theano.tensor.nnet.neighbours import lasagne from lasagne.layers import Conv2DLayer as ConvLayer, Pool2DLayer as PoolLayer @@ -23,7 +24,7 @@ def __init__(self, layers): def build_model(self): net = {} - # Main network for the primary image. + # First network for the main image. net['img'] = InputLayer((1, 3, None, None)) net['conv1_1'] = ConvLayer(net['img'], 64, 3, pad=1) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1) @@ -39,7 +40,7 @@ def build_model(self): net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1) net['main'] = ConvLayer(net['conv4_1'], 512, 3, pad=1) - # Secondary network for the semantic map. + # Second network for the semantic map. net['map'] = InputLayer((1, 3, None, None)) net['map_2'] = PoolLayer(net['map'], 2, mode='average_exc_pad') net['map_3'] = PoolLayer(net['map'], 4, mode='average_exc_pad') @@ -49,6 +50,10 @@ def build_model(self): net['sem3_1'] = ConcatLayer([net['conv3_1'], net['map_3']]) net['sem4_1'] = ConcatLayer([net['conv4_1'], net['map_4']]) + # Third network for the nearest neighbors. + net['nn3_1'] = ConvLayer(net['sem3_1'], 900, 3, b=None, pad=0) + net['nn4_1'] = ConvLayer(net['sem4_1'], 196, 3, b=None, pad=0) + self.network = net def load_params(self): @@ -67,31 +72,77 @@ def load_params(self): class NeuralGenerator(object): def __init__(self): - self.model = Model(layers=['sem3_1', 'sem4_1', 'conv4_1']) + self.model = Model(layers=['sem3_1', 'sem4_1', 'conv4_1', 'nn3_1', 'nn4_1']) self.iteration = 0 - - content_image = scipy.ndimage.imread('tree.jpg', mode='RGB') + + self.prepare_content() + self.prepare_style() + + losses = self.style_loss # + [self.variation_loss(self.model.tensor_img)] self.content_loss + grad = T.grad(sum(losses), self.model.tensor_img) + self.compute_grad_and_losses = theano.function([self.model.tensor_img, self.model.tensor_map], [grad] + losses) + + def prepare_content(self): + content_image = scipy.ndimage.imread('tree.128.jpg', mode='RGB') self.content_image = self.prepare_image(content_image) + self.content_map = np.ones((1, 3, 128, 128)) self.content_features = self.model.tensor_outputs['conv4_1'].eval({self.model.tensor_img: self.content_image}) - self.content_loss = T.mean((self.model.tensor_outputs['conv4_1'] - self.content_features) ** 2.0) + self.content_loss = [T.mean((self.model.tensor_outputs['conv4_1'] - self.content_features) ** 2.0)] + + def prepare_style(self): + style_image = scipy.ndimage.imread('tree.128.jpg', mode='RGB') + self.style_image = self.prepare_image(style_image) + self.style_map = np.ones((1, 3, 128, 128)) + + for layer in ['3_1', '4_1']: + extractor = theano.function([self.model.tensor_img, self.model.tensor_map], + self.extract_patches(self.model.tensor_outputs['sem'+layer])) + patches, norm = extractor(self.style_image, self.style_map) + print(patches.shape) + + l = self.model.network['nn'+layer] + l.N = theano.shared(norm) + l.W.set_value(patches[:,:,::-1,::-1]) + assert l.num_filters == patches.shape[0] + + def style_loss(l): + layer = self.model.network['nn'+l] + dist = self.model.tensor_outputs['nn'+l] + patches, norm = self.extract_patches(self.model.tensor_outputs['sem'+l]) + dist = dist.reshape((dist.shape[1], -1)) / norm.reshape((1,-1)) / layer.N.reshape((-1,1)) + + best = dist.argmax(axis=0) + return T.mean((patches[:,:,::-1,::-1] - layer.W[best]) ** 2.0) + + self.style_loss = [style_loss('3_1'), style_loss('4_1')] + + def extract_patches(self, f, size=3, stride=1): + patches = theano.tensor.nnet.neighbours.images2neibs(f, (size, size), (stride, stride), mode='valid') + patches = patches.reshape((-1, patches.shape[0] // f.shape[1], size, size)).dimshuffle((1, 0, 2, 3)) + + norm = T.sqrt(T.sum(patches ** 2.0, axis=(1,2,3), keepdims=True)) + return patches, norm - grad = T.grad(self.content_loss, self.model.tensor_img) - self.compute_loss_and_grad = theano.function([self.model.tensor_img], [self.content_loss, grad]) + def variation_loss(self, x): + return (((x[:,:,:-1,:-1] - x[:,:,1:,:-1])**2 + (x[:,:,:-1,:-1] - x[:,:,:-1,1:])**2)**1.25).mean() def evaluate(self, Xn): current_img = Xn.reshape(self.content_image.shape) - self.model.pixel_mean - loss, grads = self.compute_loss_and_grad(current_img) + grads, *losses = self.compute_grad_and_losses(current_img, self.content_map) + loss = sum(losses) scipy.misc.toimage(self.finalize_image(Xn), cmin=0, cmax=255).save('frames/test%04d.png'%self.iteration) - print(self.iteration, 'loss', loss, 'gradients', grads.min(), grads.max()) + print(self.iteration, 'losses', [float(l/1000) for l in losses], 'gradients', grads.min(), grads.max()) self.iteration += 1 return loss, grads.flatten().astype(np.float64) def run(self): - Xn = np.random.uniform(0, 255, self.content_image.shape[2:] + (3,)).astype(np.float32) + # Xn = self.content_image[0] + self.model.pixel_mean + + Xn = np.random.uniform(64, 192, self.content_image.shape[2:] + (3,)).astype(np.float32) data_bounds = np.zeros((np.product(Xn.shape), 2), dtype=np.float64) data_bounds[:] = (0.0, 255.0)