Skip to content

Commit

Permalink
Fix for computation without semantic channels and other non texture s…
Browse files Browse the repository at this point in the history
…ynthesis.
  • Loading branch information
alexjc committed Apr 26, 2016
1 parent 4647811 commit 7b74cad
Showing 1 changed file with 16 additions and 15 deletions.
31 changes: 16 additions & 15 deletions doodle.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import pickle
import argparse
import itertools
import collections


# Configure all options first so we can custom load other libraries (Theano) based on device specified by user.
Expand Down Expand Up @@ -114,7 +115,7 @@ def setup_model(self):
"""Use lasagne to create a network of convolution layers, first using VGG19 as the framework
and then adding augmentations for Semantic Style Transfer.
"""
net = {}
net, self.channels = {}, {}

# Primary network for the main image. These are convolution only, and stop at layer 4_2 (rest unused).
net['img'] = InputLayer((1, 3, None, None))
Expand Down Expand Up @@ -148,6 +149,8 @@ def setup_model(self):

if i == 0:
net['map%i'%(j+1)] = PoolLayer(net['map'], 2**j, mode='average_exc_pad')
self.channels[suffix] = net['conv'+suffix].num_filters

if args.semantic_weight > 0.0:
net['sem'+suffix] = ConcatLayer([net['conv'+suffix], net['map%i'%(j+1)]])
else:
Expand Down Expand Up @@ -175,9 +178,8 @@ def setup(self, layers):
"""
self.tensor_img = T.tensor4()
self.tensor_map = T.tensor4()
self.tensor_inputs = {self.network['img']: self.tensor_img, self.network['map']: self.tensor_map}

outputs = lasagne.layers.get_output([self.network[l] for l in layers], self.tensor_inputs)
tensor_inputs = {self.network['img']: self.tensor_img, self.network['map']: self.tensor_map}
outputs = lasagne.layers.get_output([self.network[l] for l in layers], tensor_inputs)
self.tensor_outputs = {k: v for k, v in zip(layers, outputs)}

def get_outputs(self, type, layers):
Expand Down Expand Up @@ -273,8 +275,6 @@ def __init__(self):

# Finalize the parameters based on what we loaded, then create the model.
args.semantic_weight = math.sqrt(9.0 / args.semantic_weight) if args.semantic_weight else 0.0
self.semantic_channel = {'3_1': 256, '4_1': 512}
print('SEMCHAN', self.semantic_channel)
self.model = Model()


Expand All @@ -288,7 +288,7 @@ def load_images(self, name, filename):
basename, _ = os.path.splitext(filename)
mapname = basename + args.semantic_ext
img = scipy.ndimage.imread(filename, mode='RGB') if os.path.exists(filename) else None
map = scipy.ndimage.imread(mapname) if os.path.exists(mapname) else None
map = scipy.ndimage.imread(mapname) if os.path.exists(mapname) and args.semantic_weight > 0.0 else None

if img is not None: print(' - Loading `{}` for {} data.'.format(filename, name))
if map is not None: print(' - Adding `{}` as semantic map.'.format(mapname))
Expand All @@ -305,14 +305,15 @@ def compile(self, arguments, function):
return theano.function(list(arguments), function, on_unused_input='ignore')

def compute_norms(self, backend, layer, array):
return [backend.sqrt(backend.sum(array[:,:self.semantic_channel[layer]] ** 2.0, axis=(1,), keepdims=True)),
backend.sqrt(backend.sum(array[:,self.semantic_channel[layer]:] ** 2.0, axis=(1,), keepdims=True))]
ni = backend.sqrt(backend.sum(array[:,:self.model.channels[layer]] ** 2.0, axis=(1,), keepdims=True))
ns = backend.sqrt(backend.sum(array[:,self.model.channels[layer]:] ** 2.0, axis=(1,), keepdims=True))
return [ni] + [ns]

def normalize_components(self, layer, array, norms):
if args.style_weight > 0.0:
array[:,:self.semantic_channel[layer]] /= (norms[0] * 3.0)
array[:,:self.model.channels[layer]] /= (norms[0] * 3.0)
if args.semantic_weight > 0.0:
array[:,self.semantic_channel[layer]:] /= (norms[1] * args.semantic_weight)
array[:,self.model.channels[layer]:] /= (norms[1] * args.semantic_weight)


#------------------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -340,7 +341,7 @@ def prepare_style(self, scale=1.0):

# Compile a function to run on the GPU to extract patches for all layers at once.
layer_outputs = zip(self.style_layers, self.model.get_outputs('sem', self.style_layers))
extractor = self.compile(self.model.tensor_inputs.values(), self.do_extract_patches(layer_outputs))
extractor = self.compile([self.model.tensor_img, self.model.tensor_map], self.do_extract_patches(layer_outputs))
result = extractor(self.style_img, self.style_map)

# Store all the style patches layer by layer, resized to match slice size and cast to 16-bit for size.
Expand All @@ -359,7 +360,7 @@ def prepare_optimization(self):
"""

# Feed-forward calculation only, returns the result of the convolution post-activation
self.compute_features = self.compile(self.model.tensor_inputs.values(),
self.compute_features = self.compile([self.model.tensor_img, self.model.tensor_map],
self.model.get_outputs('sem', self.style_layers))

# Patch matching calculation that uses only pre-calculated features and a slice of the patches.
Expand Down Expand Up @@ -453,11 +454,11 @@ def style_loss(self):
# Extract the patches from the current image, as well as their magnitude.
result = self.do_extract_patches(zip(self.style_layers, self.model.get_outputs('conv', self.style_layers)))

# Multiple style layers are optimized separately, usually sem3_1 and sem4_1.
# Multiple style layers are optimized separately, usually conv3_1 and conv4_1 — semantic data not used here.
for l, matches, patches in zip(self.style_layers, self.tensor_matches, result[0::3]):
# Compute the mean squared error between the current patch and the best matching style patch.
# Ignore the last channels (from semantic map) so errors returned are indicative of image only.
loss = T.mean((patches - matches[:,:self.semantic_channel[l]]) ** 2.0)
loss = T.mean((patches - matches[:,:self.model.channels[l]]) ** 2.0)
style_loss.append(('style', l, args.style_weight * loss))
return style_loss

Expand Down

0 comments on commit 7b74cad

Please sign in to comment.