From 67cf246eaa9f8e2868a0e925486ef13387697c57 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Sat, 23 Apr 2016 23:45:21 +0200 Subject: [PATCH] Replicating the code using two convolution layers with a single layer that's "cleverly" normalized (twice). --- doodle.py | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/doodle.py b/doodle.py index 0a4db45..802c474 100755 --- a/doodle.py +++ b/doodle.py @@ -11,6 +11,7 @@ import os import sys import bz2 +import math import time import pickle import argparse @@ -61,7 +62,7 @@ class ansi: ENDC = '\033[0m' def error(message, *lines): - string = "\n{}ERROR: " + message + "{}" + "\n".join(lines) + "{}\n" + string = "\n{}ERROR: " + message + "{}\n" + "\n".join(lines) + "{}\n" print(string.format(ansi.RED_B, ansi.RED, ansi.ENDC)) sys.exit(-1) @@ -310,10 +311,10 @@ def prepare_style(self, scale=1.0): result = extractor(self.style_image, self.style_map) self.style_data = {} - for layer, *data in zip(self.style_layers, result[0::2], result[1::2]): + for layer, *data in zip(self.style_layers, result[0::3], result[1::3], result[2::3]): l, patches = self.model.network['nn'+layer], data[0] l.num_filters = patches.shape[0] # TODO: This is the number of slices. - self.style_data[layer] = data + self.style_data[layer] = [d.astype(np.float16) for d in data] print(' - Style layer {}: {} patches in {:,}kb.'.format(layer, patches.shape[0], patches.size//1000)) @@ -331,8 +332,9 @@ def do_extract_patches(self, layers, size=3, stride=1): patches = patches.reshape((-1, patches.shape[0] // f.shape[1], size, size)).dimshuffle((1, 0, 2, 3)) # Calculate the magnitude that we'll use for normalization at runtime, then store... - norms = T.sqrt(T.sum(patches ** 2.0, axis=(1,), keepdims=True)) - results.extend([patches, norms]) + norms_m = T.sqrt(T.sum(patches[:,:-3] ** 2.0, axis=(1,), keepdims=True)) + norms_s = T.sqrt(T.sum(patches[:,-3:] ** 2.0, axis=(1,), keepdims=True)) + results.extend([patches, norms_m, norms_s]) return results def prepare_optimization(self): @@ -413,7 +415,7 @@ def style_loss(self): result = self.do_extract_patches([self.model.tensor_outputs['sem'+l] for l in self.style_layers]) # Multiple style layers are optimized separately, usually sem3_1 and sem4_1. - for l, matches, patches in zip(self.style_layers, self.tensor_matches, result[0::2]): + for l, matches, patches in zip(self.style_layers, self.tensor_matches, result[0::3]): # Compute the mean squared error between the current patch and the best matching style patch. # Ignore the last channels (from semantic map) so errors returned are indicative of image only. channels = self.style_map_original.shape[2] @@ -443,14 +445,25 @@ def evaluate(self, Xn): current_features = self.compute_features(current_img, self.content_map) # Iterate through each of the style layers one by one, computing best matches. - current_best = [] + current_best, semantic_weight = [], math.sqrt(args.semantic_weight) + assert semantic_weight > 0.0 + for l, f in zip(self.style_layers, current_features): layer = self.model.network['nn'+l] - patches, norms = self.style_data[l] - layer.W.set_value(patches / (3.0 * norms)) + patches, norms_m, norms_s = self.style_data[l] + + patches = patches.astype(np.float32) + patches[:,:-3] /= (3.0 * norms_m.astype(np.float32)) + patches[:,-3:] /= (3.0 * norms_s.astype(np.float32) * semantic_weight) + layer.W.set_value(patches) + + nm = np.sqrt(np.sum(f[:,:-3] ** 2.0, axis=(1,), keepdims=True)) + ns = np.sqrt(np.sum(f[:,-3:] ** 2.0, axis=(1,), keepdims=True)) + + f[:,:-3] /= (3.0 * nm) # TODO: Use exact number of channels. + f[:,-3:] /= (3.0 * ns * semantic_weight) - n = np.sqrt(np.sum(f ** 2.0, axis=(1,), keepdims=True)) - best, cost = self.compute_matches[l](f / (3.0 * n)) + best, cost = self.compute_matches[l](f) current_best.append(patches[best]) grads, *losses = self.compute_grad_and_losses(current_img, self.content_map, *current_best)