Skip to content

Commit

Permalink
Support for arbitrary numbers of semantic channels.
Browse files Browse the repository at this point in the history
  • Loading branch information
alexjc committed Apr 26, 2016
1 parent 7fc07f7 commit 4647811
Showing 1 changed file with 53 additions and 50 deletions.
103 changes: 53 additions & 50 deletions doodle.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,15 @@ class NeuralGenerator(object):
"""

def __init__(self):
"""Constructor sets up global variables, loads and validates files, then builds the model.
"""
self.start_time = time.time()
self.model = Model()

self.style_cache = {}
self.style_layers = args.style_layers.split(',')
self.content_layers = args.content_layers.split(',')
self.all_layers = self.style_layers + self.content_layers
self.used_layers = self.style_layers + self.content_layers

# Prepare file output and load files specified as input.
if args.save_every is not None:
os.makedirs('frames', exist_ok=True)
if args.output is not None and os.path.isfile(args.output):
Expand All @@ -234,6 +235,7 @@ def __init__(self):
print(" - No content files found; result depends on seed only.")
print(ansi.ENDC, end='')

# Display some useful errors if the user's input can't be undrestood.
if self.style_img_original is None:
error("Couldn't find style image as expected.",
" - Try making sure `{}` exists and is a valid image.".format(args.style))
Expand Down Expand Up @@ -269,6 +271,13 @@ def __init__(self):
error("Mismatch in number of channels for style and content semantic map.",
" - Make sure both images are RGB, RGBA, or L.")

# Finalize the parameters based on what we loaded, then create the model.
args.semantic_weight = math.sqrt(9.0 / args.semantic_weight) if args.semantic_weight else 0.0
self.semantic_channel = {'3_1': 256, '4_1': 512}
print('SEMCHAN', self.semantic_channel)
self.model = Model()


#------------------------------------------------------------------------------------------------------------------
# Helper Functions
#------------------------------------------------------------------------------------------------------------------
Expand All @@ -288,14 +297,23 @@ def load_images(self, name, filename):
error("The {} image and its semantic map have different resolutions. Either:".format(name),
" - Resize {} to {}, or\n - Resize {} to {}."\
.format(filename, map.shape[1::-1], mapname, img.shape[1::-1]))

return img, map

def compile(self, arguments, function):
"""Build a Theano function that will run the specified expression on the GPU.
"""
return theano.function(list(arguments), function, on_unused_input='ignore')

def compute_norms(self, backend, layer, array):
return [backend.sqrt(backend.sum(array[:,:self.semantic_channel[layer]] ** 2.0, axis=(1,), keepdims=True)),
backend.sqrt(backend.sum(array[:,self.semantic_channel[layer]:] ** 2.0, axis=(1,), keepdims=True))]

def normalize_components(self, layer, array, norms):
if args.style_weight > 0.0:
array[:,:self.semantic_channel[layer]] /= (norms[0] * 3.0)
if args.semantic_weight > 0.0:
array[:,self.semantic_channel[layer]:] /= (norms[1] * args.semantic_weight)


#------------------------------------------------------------------------------------------------------------------
# Initialization & Setup
Expand All @@ -305,7 +323,7 @@ def prepare_content(self, scale=1.0):
"""Called each phase of the optimization, rescale the original content image and its map to use as inputs.
"""
content_image = skimage.transform.rescale(self.content_img_original, scale) * 255.0
self.content_image = self.model.prepare_image(content_image)
self.content_img = self.model.prepare_image(content_image)

content_map = skimage.transform.rescale(self.content_map_original, scale) * 255.0
self.content_map = content_map.transpose((2, 0, 1))[np.newaxis].astype(np.float32)
Expand All @@ -315,15 +333,15 @@ def prepare_style(self, scale=1.0):
through the model to extract intermediate outputs (e.g. sem4_1) and turn them into patches.
"""
style_image = skimage.transform.rescale(self.style_img_original, scale) * 255.0
self.style_image = self.model.prepare_image(style_image)
self.style_img = self.model.prepare_image(style_image)

style_map = skimage.transform.rescale(self.style_map_original, scale) * 255.0
self.style_map = style_map.transpose((2, 0, 1))[np.newaxis].astype(np.float32)

# Compile a function to run on the GPU to extract patches for all layers at once.
layer_outputs = self.model.get_outputs('sem', self.style_layers)
layer_outputs = zip(self.style_layers, self.model.get_outputs('sem', self.style_layers))
extractor = self.compile(self.model.tensor_inputs.values(), self.do_extract_patches(layer_outputs))
result = extractor(self.style_image, self.style_map)
result = extractor(self.style_img, self.style_map)

# Store all the style patches layer by layer, resized to match slice size and cast to 16-bit for size.
self.style_data = {}
Expand Down Expand Up @@ -375,32 +393,28 @@ def do_extract_patches(self, layers, size=3, stride=1):
from the intermediate outputs in the model.
"""
results = []
for f in layers:
for l, f in layers:
# Use a Theano helper function to extract "neighbors" of specific size, seems a bit slower than doing
# it manually but much simpler!
patches = theano.tensor.nnet.neighbours.images2neibs(f, (size, size), (stride, stride), mode='valid')

# Make sure the patches are in the shape required to insert them into the model as another layer.
patches = patches.reshape((-1, patches.shape[0] // f.shape[1], size, size)).dimshuffle((1, 0, 2, 3))

# Calculate the magnitude that we'll use for normalization at runtime, then store...
norms_m = T.sqrt(T.sum(patches[:,:-3] ** 2.0, axis=(1,), keepdims=True))
norms_s = T.sqrt(T.sum(patches[:,-3:] ** 2.0, axis=(1,), keepdims=True))
results.extend([patches, norms_m, norms_s])
results.extend([patches] + self.compute_norms(T, l, patches))
return results

def do_match_patches(self, layer):
# Use node in the model to compute the result of the normalized cross-correlation, using results from the
# nearest-neighbor layers called 'nn3_1' and 'nn4_1'.
dist = self.matcher_outputs[layer]
dist = dist.reshape((dist.shape[1], -1))

# Compute the score of each patch, taking into account statistics from previous iteration. This equalizes
# the chances of the patches being selected when the user requests more variety.
offset = self.matcher_history[layer].reshape((-1, 1))
scores = (dist - offset * args.variety)
matches = scores.argmax(axis=0)

# Pick the best style patches for each patch in the current image, the result is an array of indices.
return [matches, scores.max(axis=0), dist.max(axis=1)]
# Also return the maximum value along both axis, used to compare slices and add patch variety.
return [scores.argmax(axis=0), scores.max(axis=0), dist.max(axis=1)]


#------------------------------------------------------------------------------------------------------------------
Expand All @@ -418,7 +432,7 @@ def content_loss(self):

# First extract all the features we need from the model, these results after convolution.
extractor = theano.function([self.model.tensor_img], self.model.get_outputs('conv', self.content_layers))
result = extractor(self.content_image)
result = extractor(self.content_img)

# Build a list of loss components that compute the mean squared error by comparing current result to desired.
for l, ref in zip(self.content_layers, result):
Expand All @@ -437,16 +451,14 @@ def style_loss(self):
return style_loss

# Extract the patches from the current image, as well as their magnitude.
result = self.do_extract_patches(self.model.get_outputs('conv', self.style_layers))
result = self.do_extract_patches(zip(self.style_layers, self.model.get_outputs('conv', self.style_layers)))

# Multiple style layers are optimized separately, usually sem3_1 and sem4_1.
for l, matches, patches in zip(self.style_layers, self.tensor_matches, result[0::3]):
# Compute the mean squared error between the current patch and the best matching style patch.
# Ignore the last channels (from semantic map) so errors returned are indicative of image only.
channels = self.style_map_original.shape[2]
loss = T.mean((patches - matches[:,:-channels]) ** 2.0)
loss = T.mean((patches - matches[:,:self.semantic_channel[l]]) ** 2.0)
style_loss.append(('style', l, args.style_weight * loss))

return style_loss

def total_variation_loss(self):
Expand All @@ -462,66 +474,58 @@ def total_variation_loss(self):
#------------------------------------------------------------------------------------------------------------------

def iterate_batches(self, *arrays, batch_size):
"""Break down the data in arrays batch by batch and return them as a generator.
"""
total_size = arrays[0].shape[0]
indices = np.arange(total_size)

for index in range(0, total_size, batch_size):
excerpt = indices[index:index + batch_size]
yield excerpt, [a[excerpt] for a in arrays]

def evaluate_slices(self, f, l, semantic_weight):
def evaluate_slices(self, f, l):
if args.cache and l in self.style_cache:
return self.style_cache[l]

layer, data = self.model.network['nn'+l], self.style_data[l]
history = data[-1]

best_idx, best_val = None, 0.0
for idx, (bp, bm, bs, bh) in self.iterate_batches(*data, batch_size=layer.num_filters):
for idx, (bp, bi, bs, bh) in self.iterate_batches(*data, batch_size=layer.num_filters):
weights = bp.astype(np.float32)
weights[:,:-3] /= (bm * 3.0) # TODO: Use exact number of channels.
if semantic_weight: weights[:,-3:] /= (bs * semantic_weight)
self.normalize_components(l, weights, (bi, bs))
layer.W.set_value(weights)

cur_idx, cur_val, cur_match = self.compute_matches[l](history[idx])
if best_idx is None:
best_idx = cur_idx
best_val = cur_val
best_idx, best_val = cur_idx, cur_val
else:
i = np.where(cur_val > best_val)
best_idx[i] = idx[cur_idx[i]]
best_val[i] = cur_val[i]

history[idx] = cur_match

self.style_cache[l] = best_idx

if args.cache:
self.style_cache[l] = best_idx
return best_idx

def evaluate(self, Xn):
"""Callback for the L-BFGS optimization that computes the loss and gradients on the GPU.
"""

# Adjust the representation to be compatible with the model before computing results.
current_img = Xn.reshape(self.content_image.shape).astype(np.float32) - self.model.pixel_mean
current_img = Xn.reshape(self.content_img.shape).astype(np.float32) - self.model.pixel_mean
current_features = self.compute_features(current_img, self.content_map)

# Iterate through each of the style layers one by one, computing best matches.
current_best, semantic_weight = [], math.sqrt(9.0 / args.semantic_weight) if args.semantic_weight else None

current_best = []
for l, f in zip(self.style_layers, current_features):
# Helper for normalizing an array? TODO TODO!!
nm = np.sqrt(np.sum(f[:,:-3] ** 2.0, axis=(1,), keepdims=True))
ns = np.sqrt(np.sum(f[:,-3:] ** 2.0, axis=(1,), keepdims=True))

f[:,:-3] /= (nm * 3.0) # TODO: Use exact number of channels.
if semantic_weight: f[:,-3:] /= (ns * semantic_weight)

self.normalize_components(l, f, self.compute_norms(np, l, f))
self.matcher_tensors[l].set_value(f)

# Compute best matching patches this style layer, going through all slices.
warmup = bool(args.variety > 0.0 and self.iteration == 0)
for _ in range(2 if warmup else 1):
best_idx = self.evaluate_slices(f, l, semantic_weight)
best_idx = self.evaluate_slices(f, l)

patches = self.style_data[l][0]
current_best.append(patches[best_idx].astype(np.float32))
Expand All @@ -536,7 +540,7 @@ def evaluate(self, Xn):

# Dump the image to disk if requested by the user.
if args.save_every and self.frame % args.save_every == 0:
frame = Xn.reshape(self.content_image.shape[1:])
frame = Xn.reshape(self.content_img.shape[1:])
resolution = self.content_img_original.shape
image = scipy.misc.toimage(self.model.finalize_image(frame, resolution), cmin=0, cmax=255)
image.save('frames/%04d.png'%self.frame)
Expand Down Expand Up @@ -569,7 +573,6 @@ def evaluate(self, Xn):
def run(self):
"""The main entry point for the application, runs through multiple phases at increasing resolutions.
"""

self.frame, Xn = 0, None
for i in range(args.phases):
self.error = 255.0
Expand All @@ -585,14 +588,14 @@ def run(self):
self.prepare_style(scale)

# Now setup the model with the new data, ready for the optimization loop.
self.model.setup(layers=['sem'+l for l in self.style_layers] + ['conv'+l for l in self.all_layers])
self.model.setup(layers=['sem'+l for l in self.style_layers] + ['conv'+l for l in self.used_layers])
self.prepare_optimization()
print('{}'.format(ansi.ENDC))

# Setup the seed for the optimization as specified by the user.
shape = self.content_image.shape[2:]
shape = self.content_img.shape[2:]
if args.seed == 'content':
Xn = self.content_image[0] + self.model.pixel_mean
Xn = self.content_img[0] + self.model.pixel_mean
if args.seed == 'noise':
bounds = [int(i) for i in args.seed_range.split(':')]
Xn = np.random.uniform(bounds[0], bounds[1], shape + (3,)).astype(np.float32)
Expand Down Expand Up @@ -630,7 +633,7 @@ def run(self):
interrupt = True

args.seed = 'previous'
resolution = self.content_image.shape
resolution = self.content_img.shape
Xn = Xn.reshape(resolution)

output = self.model.finalize_image(Xn[0], self.content_img_original.shape)
Expand Down

0 comments on commit 4647811

Please sign in to comment.