Skip to content

Commit

Permalink
Variety of minor fixes for all transfer modes and cleanup of the code…
Browse files Browse the repository at this point in the history
…, some documentation.
  • Loading branch information
alexjc committed Apr 25, 2016
1 parent 2b6cfb3 commit 7fc07f7
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 56 deletions.
8 changes: 4 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ The ``doodle.py`` script generates a new image by using one, two, three or four

**NOTE**: Making a ``#NeuralDoodle`` is a skill. The parameters in the script were adjusted to work well by default and with the examples below. For new images, you may need to adjust values and modify on your input data too. It takes practice, but you can reach almost photo-realistic results if you iterate! (`Ask for advice here or see examples <https://github.com/alexjc/neural-doodle/issues?q=label%3Aadvice>`_.)

1. `Examples & Usage <#examples--usage>`_
2. `Installation <#installation-setup>`_
3. `Troubleshooting <#troubleshooting-problems>`_
4. `Frequent Questions <#frequent-questions>`_
1. `Examples & Usage <#1-examples--usage>`_
2. `Installation <#2-installation-setup>`_
3. `Troubleshooting <#3-troubleshooting-problems>`_
4. `Frequent Questions <#4-frequent-questions>`_

**IMPORTANT**: This project is possible thanks to the `nucl.ai Conference <http://events.nucl.ai/>`_ on Creative AI, **July 18-20**. Join us in **Vienna**!

Expand Down
119 changes: 67 additions & 52 deletions doodle.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
add_arg('--output-size', default=None, type=str, help='Size of the output image, e.g. 512x512.')
add_arg('--phases', default=3, type=int, help='Number of image scales to process in phases.')
add_arg('--slices', default=2, type=int, help='Split patches up into this number of batches.')
add_arg('--cache', default=0, type=int, help='Whether to compute matches only once.')
add_arg('--smoothness', default=1E+0, type=float, help='Weight of image smoothing scheme.')
add_arg('--variety', default=0.0, type=float, help='Bias toward selecting diverse patches, e.g. 0.5.')
add_arg('--seed', default='noise', type=str, help='Seed image path, "noise" or "content".')
Expand Down Expand Up @@ -113,7 +114,6 @@ def setup_model(self):
"""Use lasagne to create a network of convolution layers, first using VGG19 as the framework
and then adding augmentations for Semantic Style Transfer.
"""

net = {}

# Primary network for the main image. These are convolution only, and stop at layer 4_2 (rest unused).
Expand Down Expand Up @@ -141,14 +141,17 @@ def setup_model(self):
net['main'] = net['conv5_4']

# Auxiliary network for the semantic layers, and the nearest neighbors calculations.
net['map'] = InputLayer((1, 3, None, None)) # TODO: This should not always be 3, could be 4 or 1.
net['map'] = InputLayer((1, 1, None, None))
for j, i in itertools.product(range(5), range(4)):
if j < 2 and i > 1: continue
suffix = '%i_%i' % (j+1, i+1)

if i == 0:
net['map%i'%(j+1)] = PoolLayer(net['map'], 2**j, mode='average_exc_pad')
net['sem'+suffix] = ConcatLayer([net['conv'+suffix], net['map%i'%(j+1)]])
if args.semantic_weight > 0.0:
net['sem'+suffix] = ConcatLayer([net['conv'+suffix], net['map%i'%(j+1)]])
else:
net['sem'+suffix] = net['conv'+suffix]

net['dup'+suffix] = InputLayer(net['sem'+suffix].output_shape)
net['nn'+suffix] = ConvLayer(net['dup'+suffix], 1, 3, b=None, pad=0, flip_filters=False)
Expand All @@ -158,7 +161,6 @@ def setup_model(self):
def load_data(self):
"""Open the serialized parameters from a pre-trained network, and load them into the model created.
"""

vgg19_file = os.path.join(os.path.dirname(__file__), 'vgg19_conv.pkl.bz2')
if not os.path.exists(vgg19_file):
error("Model file with pre-trained convolution layers not found. Download here...",
Expand All @@ -171,20 +173,23 @@ def load_data(self):
def setup(self, layers):
"""Setup the inputs and outputs, knowing the layers that are required by the optimization algorithm.
"""

self.tensor_img = T.tensor4()
self.tensor_map = T.tensor4()
self.tensor_inputs = {self.network['img']: self.tensor_img, self.network['map']: self.tensor_map}

outputs = lasagne.layers.get_output([self.network[l] for l in layers], self.tensor_inputs)
self.tensor_outputs = {k: v for k, v in zip(layers, outputs)}

def get_outputs(self, type, layers):
"""Fetch the output tensors for the network layers.
"""
return [self.tensor_outputs[type+l] for l in layers]

def prepare_image(self, image):
"""Given an image loaded from disk, turn it into a representation compatible with the model.
The format is (b,c,y,x) with batch=1 for a single image, channels=3 for RGB, and y,x matching
the resolution.
"""

image = np.swapaxes(np.swapaxes(image, 1, 2), 0, 1)[::-1, :, :]
image = image.astype(np.float32) - self.pixel_mean
return image[np.newaxis]
Expand All @@ -193,7 +198,6 @@ def finalize_image(self, image, resolution):
"""Based on the output of the neural network, convert it into an image format that can be saved
to disk -- shuffling dimensions as appropriate.
"""

image = np.swapaxes(np.swapaxes(image[::-1], 0, 1), 1, 2)
image = np.clip(image, 0, 255).astype('uint8')
return scipy.misc.imresize(image, resolution, interp='bicubic')
Expand All @@ -214,6 +218,7 @@ def __init__(self):
self.style_cache = {}
self.style_layers = args.style_layers.split(',')
self.content_layers = args.content_layers.split(',')
self.all_layers = self.style_layers + self.content_layers

if args.save_every is not None:
os.makedirs('frames', exist_ok=True)
Expand Down Expand Up @@ -264,6 +269,10 @@ def __init__(self):
error("Mismatch in number of channels for style and content semantic map.",
" - Make sure both images are RGB, RGBA, or L.")

#------------------------------------------------------------------------------------------------------------------
# Helper Functions
#------------------------------------------------------------------------------------------------------------------

def load_images(self, name, filename):
"""If the image and map files exist, load them. Otherwise they'll be set to default values later.
"""
Expand All @@ -282,6 +291,12 @@ def load_images(self, name, filename):

return img, map

def compile(self, arguments, function):
"""Build a Theano function that will run the specified expression on the GPU.
"""
return theano.function(list(arguments), function, on_unused_input='ignore')


#------------------------------------------------------------------------------------------------------------------
# Initialization & Setup
#------------------------------------------------------------------------------------------------------------------
Expand All @@ -306,50 +321,28 @@ def prepare_style(self, scale=1.0):
self.style_map = style_map.transpose((2, 0, 1))[np.newaxis].astype(np.float32)

# Compile a function to run on the GPU to extract patches for all layers at once.
required_layers = ['sem'+l for l in self.style_layers]
extractor = theano.function(
[self.model.tensor_img, self.model.tensor_map],
self.do_extract_patches([self.model.tensor_outputs[l] for l in required_layers]))
layer_outputs = self.model.get_outputs('sem', self.style_layers)
extractor = self.compile(self.model.tensor_inputs.values(), self.do_extract_patches(layer_outputs))
result = extractor(self.style_image, self.style_map)

# Store all the style patches layer by layer, resized to match slice size and cast to 16-bit for size.
self.style_data = {}
for layer, *data in zip(self.style_layers, result[0::3], result[1::3], result[2::3]):
l = self.model.network['nn'+layer]
patches = data[0]
l = self.model.network['nn'+layer]
l.num_filters = patches.shape[0] // args.slices
data[0] = data[0][:l.num_filters*args.slices]
self.style_data[layer] = [d.astype(np.float16) for d in data] + [np.zeros((patches.shape[0],), dtype=np.float16)]
self.style_data[layer] = [d[:l.num_filters*args.slices].astype(np.float16) for d in data]\
+ [np.zeros((patches.shape[0],), dtype=np.float16)]
print(' - Style layer {}: {} patches in {:,}kb.'.format(layer, patches.shape, patches.size//1000))


def do_extract_patches(self, layers, size=3, stride=1):
"""This function builds a Theano expression that will get compiled an run on the GPU. It extracts 3x3 patches
from the intermediate outputs in the model.
"""
results = []
for f in layers:
# Use a Theano helper function to extract "neighbors" of specific size, seems a bit slower than doing
# it manually but much simpler!
patches = theano.tensor.nnet.neighbours.images2neibs(f, (size, size), (stride, stride), mode='valid')

# Make sure the patches are in the shape required to insert them into the model as another layer.
patches = patches.reshape((-1, patches.shape[0] // f.shape[1], size, size)).dimshuffle((1, 0, 2, 3))

# Calculate the magnitude that we'll use for normalization at runtime, then store...
norms_m = T.sqrt(T.sum(patches[:,:-3] ** 2.0, axis=(1,), keepdims=True))
norms_s = T.sqrt(T.sum(patches[:,-3:] ** 2.0, axis=(1,), keepdims=True))
results.extend([patches, norms_m, norms_s])
return results

def prepare_optimization(self):
"""Optimization requires a function to compute the error (aka. loss) which is done in multiple components.
Here we compile a function to run on the GPU that returns all components separately.
"""

# Feed-forward calculation only, returns the result of the convolution post-activation
self.compute_features = theano.function(
[self.model.tensor_img, self.model.tensor_map],
[self.model.tensor_outputs['sem'+l] for l in self.style_layers])
self.compute_features = self.compile(self.model.tensor_inputs.values(),
self.model.get_outputs('sem', self.style_layers))

# Patch matching calculation that uses only pre-calculated features and a slice of the patches.

Expand All @@ -359,8 +352,8 @@ def prepare_optimization(self):
nn_layers = [self.model.network['nn'+l] for l in self.style_layers]
self.matcher_outputs = dict(zip(self.style_layers, lasagne.layers.get_output(nn_layers, self.matcher_inputs)))

self.compute_matches = {l: theano.function([self.matcher_history[l]],
self.do_match_patches(l)) for l in self.style_layers}
self.compute_matches = {l: self.compile([self.matcher_history[l]], self.do_match_patches(l))\
for l in self.style_layers}

self.tensor_matches = [T.tensor4() for l in self.style_layers]
# Build a list of Theano expressions that, once summed up, compute the total error.
Expand All @@ -372,6 +365,30 @@ def prepare_optimization(self):
[self.model.tensor_img, self.model.tensor_map] + self.tensor_matches,
[grad] + [l[-1] for l in self.losses], on_unused_input='ignore')


#------------------------------------------------------------------------------------------------------------------
# Theano Computation
#------------------------------------------------------------------------------------------------------------------

def do_extract_patches(self, layers, size=3, stride=1):
"""This function builds a Theano expression that will get compiled an run on the GPU. It extracts 3x3 patches
from the intermediate outputs in the model.
"""
results = []
for f in layers:
# Use a Theano helper function to extract "neighbors" of specific size, seems a bit slower than doing
# it manually but much simpler!
patches = theano.tensor.nnet.neighbours.images2neibs(f, (size, size), (stride, stride), mode='valid')

# Make sure the patches are in the shape required to insert them into the model as another layer.
patches = patches.reshape((-1, patches.shape[0] // f.shape[1], size, size)).dimshuffle((1, 0, 2, 3))

# Calculate the magnitude that we'll use for normalization at runtime, then store...
norms_m = T.sqrt(T.sum(patches[:,:-3] ** 2.0, axis=(1,), keepdims=True))
norms_s = T.sqrt(T.sum(patches[:,-3:] ** 2.0, axis=(1,), keepdims=True))
results.extend([patches, norms_m, norms_s])
return results

def do_match_patches(self, layer):
# Use node in the model to compute the result of the normalized cross-correlation, using results from the
# nearest-neighbor layers called 'nn3_1' and 'nn4_1'.
Expand Down Expand Up @@ -400,8 +417,7 @@ def content_loss(self):
return content_loss

# First extract all the features we need from the model, these results after convolution.
extractor = theano.function([self.model.tensor_img],
[self.model.tensor_outputs['conv'+l] for l in self.content_layers])
extractor = theano.function([self.model.tensor_img], self.model.get_outputs('conv', self.content_layers))
result = extractor(self.content_image)

# Build a list of loss components that compute the mean squared error by comparing current result to desired.
Expand All @@ -420,16 +436,15 @@ def style_loss(self):
if args.style_weight == 0.0:
return style_loss

# TODO: Here only need to transfer 'conv' layers, skip data from semantic map!
# Extract the patches from the current image, as well as their magnitude.
result = self.do_extract_patches([self.model.tensor_outputs['sem'+l] for l in self.style_layers])
result = self.do_extract_patches(self.model.get_outputs('conv', self.style_layers))

# Multiple style layers are optimized separately, usually sem3_1 and sem4_1.
for l, matches, patches in zip(self.style_layers, self.tensor_matches, result[0::3]):
# Compute the mean squared error between the current patch and the best matching style patch.
# Ignore the last channels (from semantic map) so errors returned are indicative of image only.
channels = self.style_map_original.shape[2]
loss = T.mean((patches[:,:-channels] - matches[:,:-channels]) ** 2.0)
loss = T.mean((patches - matches[:,:-channels]) ** 2.0)
style_loss.append(('style', l, args.style_weight * loss))

return style_loss
Expand All @@ -455,8 +470,8 @@ def iterate_batches(self, *arrays, batch_size):
yield excerpt, [a[excerpt] for a in arrays]

def evaluate_slices(self, f, l, semantic_weight):
# if l in self.style_cache:
# return self.style_cache[l]
if args.cache and l in self.style_cache:
return self.style_cache[l]

layer, data = self.model.network['nn'+l], self.style_data[l]
history = data[-1]
Expand Down Expand Up @@ -494,7 +509,7 @@ def evaluate(self, Xn):
current_best, semantic_weight = [], math.sqrt(9.0 / args.semantic_weight) if args.semantic_weight else None

for l, f in zip(self.style_layers, current_features):
# Helper for normalizing an array?
# Helper for normalizing an array? TODO TODO!!
nm = np.sqrt(np.sum(f[:,:-3] ** 2.0, axis=(1,), keepdims=True))
ns = np.sqrt(np.sum(f[:,-3:] ** 2.0, axis=(1,), keepdims=True))

Expand Down Expand Up @@ -544,9 +559,11 @@ def evaluate(self, Xn):
print(' {}time{} {:3.1f}s '.format(ansi.BOLD, ansi.ENDC, current_time - self.iter_time), flush=True)
self.iter_time = current_time

# Return the data in the right format for L-BFGS.
# Update counters and timers.
self.frame += 1
self.iteration += 1

# Return the data in the right format for L-BFGS.
return loss, np.array(grads).flatten().astype(np.float64)

def run(self):
Expand All @@ -563,14 +580,12 @@ def run(self):
.format(ansi.BLUE_B, i, int(shape[1]*scale), int(shape[0]*scale), scale, ansi.BLUE))

# Precompute all necessary data for the various layers, put patches in place into augmented network.
self.model.setup(layers=['sem'+l for l in self.style_layers] +
['conv'+l for l in self.content_layers])
self.model.setup(layers=['sem'+l for l in self.style_layers] + ['conv'+l for l in self.content_layers])
self.prepare_content(scale)
self.prepare_style(scale)

# Now setup the model with the new data, ready for the optimization loop.
self.model.setup(layers=['sem'+l for l in self.style_layers] +
['conv'+l for l in self.content_layers])
self.model.setup(layers=['sem'+l for l in self.style_layers] + ['conv'+l for l in self.all_layers])
self.prepare_optimization()
print('{}'.format(ansi.ENDC))

Expand Down

0 comments on commit 7fc07f7

Please sign in to comment.