forked from clvrai/Relation-Network-Tensorflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_rn.py
153 lines (127 loc) · 5.67 KB
/
model_rn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import tensorflow.contrib.slim as slim
try:
import tfplot
except:
pass
from ops import conv2d, fc
from util import log
from vqa_util import question2str, answer2str
class Model(object):
def __init__(self, config,
debug_information=False,
is_train=True):
self.debug = debug_information
self.config = config
self.batch_size = self.config.batch_size
self.img_size = self.config.data_info[0]
self.c_dim = self.config.data_info[2]
self.q_dim = self.config.data_info[3]
self.a_dim = self.config.data_info[4]
self.conv_info = self.config.conv_info
# create placeholders for the input
self.img = tf.placeholder(
name='img', dtype=tf.float32,
shape=[self.batch_size, self.img_size, self.img_size, self.c_dim],
)
self.q = tf.placeholder(
name='q', dtype=tf.float32, shape=[self.batch_size, self.q_dim],
)
self.a = tf.placeholder(
name='a', dtype=tf.float32, shape=[self.batch_size, self.a_dim],
)
self.is_training = tf.placeholder_with_default(bool(is_train), [], name='is_training')
self.build(is_train=is_train)
def get_feed_dict(self, batch_chunk, step=None, is_training=None):
fd = {
self.img: batch_chunk['img'], # [B, h, w, c]
self.q: batch_chunk['q'], # [B, n]
self.a: batch_chunk['a'], # [B, m]
}
if is_training is not None:
fd[self.is_training] = is_training
return fd
def build(self, is_train=True):
n = self.a_dim
conv_info = self.conv_info
# build loss and accuracy {{{
def build_loss(logits, labels):
# Cross-entropy loss
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels)
# Classification accuracy
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return tf.reduce_mean(loss), accuracy
# }}}
def concat_coor(o, i, d):
coor = tf.tile(tf.expand_dims(
[float(int(i / d)) / d, (i % d) / d], axis=0), [self.batch_size, 1])
o = tf.concat([o, tf.to_float(coor)], axis=1)
return o
def g_theta(o_i, o_j, q, scope='g_theta', reuse=True):
with tf.variable_scope(scope, reuse=reuse) as scope:
if not reuse: log.warn(scope.name)
g_1 = fc(tf.concat([o_i, o_j, q], axis=1), 256, name='g_1')
g_2 = fc(g_1, 256, name='g_2')
g_3 = fc(g_2, 256, name='g_3')
g_4 = fc(g_3, 256, name='g_4')
return g_4
# Classifier: takes images as input and outputs class label [B, m]
def CONV(img, q, scope='CONV'):
with tf.variable_scope(scope) as scope:
log.warn(scope.name)
conv_1 = conv2d(img, conv_info[0], is_train, s_h=3, s_w=3, name='conv_1')
conv_2 = conv2d(conv_1, conv_info[1], is_train, s_h=3, s_w=3, name='conv_2')
conv_3 = conv2d(conv_2, conv_info[2], is_train, name='conv_3')
conv_4 = conv2d(conv_3, conv_info[3], is_train, name='conv_4')
# eq.1 in the paper
# g_theta = (o_i, o_j, q)
# conv_4 [B, d, d, k]
d = conv_4.get_shape().as_list()[1]
all_g = []
for i in range(d*d):
o_i = conv_4[:, int(i / d), int(i % d), :]
o_i = concat_coor(o_i, i, d)
for j in range(d*d):
o_j = conv_4[:, int(j / d), int(j % d), :]
o_j = concat_coor(o_j, j, d)
if i == 0 and j == 0:
g_i_j = g_theta(o_i, o_j, q, reuse=False)
else:
g_i_j = g_theta(o_i, o_j, q, reuse=True)
all_g.append(g_i_j)
all_g = tf.stack(all_g, axis=0)
all_g = tf.reduce_mean(all_g, axis=0, name='all_g')
return all_g
def f_phi(g, scope='f_phi'):
with tf.variable_scope(scope) as scope:
log.warn(scope.name)
fc_1 = fc(g, 256, name='fc_1')
fc_2 = fc(fc_1, 256, name='fc_2')
fc_2 = slim.dropout(fc_2, keep_prob=0.5, is_training=is_train, scope='fc_3/')
fc_3 = fc(fc_2, n, activation_fn=None, name='fc_3')
return fc_3
g = CONV(self.img, self.q, scope='CONV')
logits = f_phi(g, scope='f_phi')
self.all_preds = tf.nn.softmax(logits)
self.loss, self.accuracy = build_loss(logits, self.a)
# Add summaries
def draw_iqa(img, q, target_a, pred_a):
fig, ax = tfplot.subplots(figsize=(6, 6))
ax.imshow(img)
ax.set_title(question2str(q))
ax.set_xlabel(answer2str(target_a)+answer2str(pred_a, 'Predicted'))
return fig
try:
tfplot.summary.plot_many('IQA/',
draw_iqa, [self.img, self.q, self.a, self.all_preds],
max_outputs=4,
collections=["plot_summaries"])
except:
pass
tf.summary.scalar("loss/accuracy", self.accuracy)
tf.summary.scalar("loss/cross_entropy", self.loss)
log.warn('Successfully loaded the model.')