forked from elleryqueenhomels/arbitrary_style_transfer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathencoder.py
102 lines (70 loc) · 2.83 KB
/
encoder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Encoder is fixed to the first few layers (up to relu4_1)
# of VGG-19 (pre-trained on ImageNet)
# This code is a modified version of Anish Athalye's vgg.py
# https://github.com/anishathalye/neural-style/blob/master/vgg.py
import pickle
import numpy as np
import tensorflow as tf
ENCODER_LAYERS = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1'
)
class Encoder(object):
def __init__(self, weights_path):
# create the TensorFlow variables
with open(weights_path, 'rb') as f:
weights = pickle.load(f, encoding='latin-1')['param values']
idx = 0
self.weight_vars = []
with tf.variable_scope('encoder'):
for layer in ENCODER_LAYERS:
kind = layer[:4]
if kind == 'conv':
kernel = weights[idx].transpose([2, 3, 1, 0])
bias = weights[idx + 1]
idx += 2
with tf.variable_scope(layer):
W = tf.Variable(kernel, trainable=False, name='kernel')
b = tf.Variable(bias, trainable=False, name='bias')
self.weight_vars.append((W, b))
def encode(self, image):
# create the computational graph
idx = 0
layers = {}
current = image
for layer in ENCODER_LAYERS:
kind = layer[:4]
if kind == 'conv':
kernel, bias = self.weight_vars[idx]
idx += 1
current = conv2d(current, kernel, bias)
elif kind == 'relu':
current = tf.nn.relu(current)
elif kind == 'pool':
current = pool2d(current)
layers[layer] = current
assert(len(layers) == len(ENCODER_LAYERS))
enc = layers[ENCODER_LAYERS[-1]]
return enc, layers
def preprocess(self, image, mode='BGR'):
if mode == 'BGR':
return image - np.array([103.939, 116.779, 123.68])
else:
return image - np.array([123.68, 116.779, 103.939])
def deprocess(self, image, mode='BGR'):
if mode == 'BGR':
return image + np.array([103.939, 116.779, 123.68])
else:
return image + np.array([123.68, 116.779, 103.939])
def conv2d(x, kernel, bias):
# padding image with reflection mode
x_padded = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]], mode='REFLECT')
# conv and add bias
out = tf.nn.conv2d(x_padded, kernel, strides=[1, 1, 1, 1], padding='VALID')
out = tf.nn.bias_add(out, bias)
return out
def pool2d(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')