44from config import cfg
55
66
7- class CapsConv (object ):
7+ epsilon = 1e-9
8+
9+
10+ class CapsLayer (object ):
811 ''' Capsule layer.
9- Args:
12+ Args:
1013 input: A 4-D tensor.
11- num_units: integer, the length of the output vector of a capsule.
12- with_routing: boolean, this capsule is routing with the
13- lower-level layer capsule.
1414 num_outputs: the number of capsule in this layer.
15- Returns:
15+ vec_len: integer, the length of the output vector of a capsule.
16+ layer_type: string, one of 'FC' or "CONV", the type of this layer,
17+ fully connected or convolution, for the future expansion capability
18+ with_routing: boolean, this capsule is routing with the
19+ lower-level layer capsule.
20+ Returns:
1621 A 4-D tensor.
17- '''
18- def __init__ (self , num_units , with_routing = True ):
19- self .num_units = num_units
20- self .with_routing = with_routing
21-
22- def __call__ (self , input , num_outputs , kernel_size = None , stride = None ):
22+ '''
23+ def __init__ (self , num_outputs , vec_len , with_routing = True , layer_type = 'FC' ):
2324 self .num_outputs = num_outputs
24- self .kernel_size = kernel_size
25- self .stride = stride
26-
27- if not self .with_routing :
28- # the PrimaryCaps layer
29- # input: [batch_size, 20, 20, 256]
30- assert input .get_shape () == [cfg .batch_size , 20 , 20 , 256 ]
31-
32- capsules = []
33- for i in range (self .num_units ):
34- # each capsule i: [batch_size, 6, 6, 32]
35- with tf .variable_scope ('ConvUnit_' + str (i )):
36- caps_i = tf .contrib .layers .conv2d (input ,
37- self .num_outputs ,
38- self .kernel_size ,
39- self .stride ,
40- padding = "VALID" )
25+ self .vec_len = vec_len
26+ self .with_routing = with_routing
27+ self .layer_type = layer_type
28+
29+ def __call__ (self , input , kernel_size = None , stride = None ):
30+ '''
31+ The parameters 'kernel_size' and 'stride' will be used while 'layer_type' equal 'CONV'
32+ '''
33+ if self .layer_type == 'CONV' :
34+ self .kernel_size = kernel_size
35+ self .stride = stride
36+
37+ if not self .with_routing :
38+ # the PrimaryCaps layer, a convolutional layer
39+ # input: [batch_size, 20, 20, 256]
40+ assert input .get_shape () == [cfg .batch_size , 20 , 20 , 256 ]
41+
42+ '''
43+ # version 1, computational expensive
44+ capsules = []
45+ for i in range(self.vec_len):
46+ # each capsule i: [batch_size, 6, 6, 32]
47+ with tf.variable_scope('ConvUnit_' + str(i)):
48+ caps_i = tf.contrib.layers.conv2d(input, self.num_outputs,
49+ self.kernel_size, self.stride,
50+ padding="VALID", activation_fn=None)
4151 caps_i = tf.reshape(caps_i, shape=(cfg.batch_size, -1, 1, 1))
4252 capsules.append(caps_i)
43-
44- assert capsules [0 ].get_shape () == [cfg .batch_size , 1152 , 1 , 1 ]
45-
46- # [batch_size, 1152, 8, 1]
47- capsules = tf .concat (capsules , axis = 2 )
48- capsules = squash (capsules )
49- assert capsules .get_shape () == [cfg .batch_size , 1152 , 8 , 1 ]
50-
51- else :
52- # the DigitCaps layer
53- # Reshape the input into shape [batch_size, 1152, 8, 1]
54- self .input = tf .reshape (input , shape = (cfg .batch_size , 1152 , 8 , 1 ))
55-
56- # b_IJ: [1, num_caps_l, num_caps_l_plus_1, 1]
57- b_IJ = tf .zeros (shape = [1 , 1152 , 10 , 1 ], dtype = np .float32 )
58- capsules = []
59- for j in range (self .num_outputs ):
60- with tf .variable_scope ('caps_' + str (j )):
61- caps_j , b_IJ = capsule (input , b_IJ , j )
62- capsules .append (caps_j )
63-
64- # Return a tensor with shape [batch_size, 10, 16, 1]
65- capsules = tf .concat (capsules , axis = 1 )
66- assert capsules .get_shape () == [cfg .batch_size , 10 , 16 , 1 ]
67-
68- return (capsules )
69-
70-
71- def capsule (input , b_IJ , idx_j ):
72- ''' The routing algorithm for one capsule in the layer l+1.
73- Args:
74- input: A Tensor with [batch_size, num_caps_l=1152, length(u_i)=8, 1]
75- shape, num_caps_l meaning the number of capsule in the layer l.
76- Returns:
77- A Tensor of shape [batch_size, 1, length(v_j)=16, 1] representing the
78- vector output `v_j` of capsule j in the layer l+1
79- Notes:
53+ assert capsules[0].get_shape() == [cfg.batch_size, 1152, 1, 1]
54+ capsules = tf.concat(capsules, axis=2)
55+ '''
56+
57+ # version 2, equivalent to version 1 but higher computational
58+ # efficiency.
59+ # NOTE: I can't find out any words from the paper whether the
60+ # PrimaryCap convolution does a ReLU activation or not before
61+ # squashing function, but experiment show that using ReLU get a
62+ # higher test accuracy. So, which one to use will be your choice
63+ capsules = tf .contrib .layers .conv2d (input , self .num_outputs * self .vec_len ,
64+ self .kernel_size , self .stride , padding = "VALID" ,
65+ activation_fn = tf .nn .relu )
66+ # capsules = tf.contrib.layers.conv2d(input, self.num_outputs * self.vec_len,
67+ # self.kernel_size, self.stride,padding="VALID",
68+ # activation_fn=None)
69+ capsules = tf .reshape (capsules , (cfg .batch_size , - 1 , self .vec_len , 1 ))
70+
71+ # [batch_size, 1152, 8, 1]
72+ capsules = squash (capsules )
73+ assert capsules .get_shape () == [cfg .batch_size , 1152 , 8 , 1 ]
74+ return (capsules )
75+
76+ if self .layer_type == 'FC' :
77+ if self .with_routing :
78+ # the DigitCaps layer, a fully connected layer
79+ # Reshape the input into [batch_size, 1152, 1, 8, 1]
80+ self .input = tf .reshape (input , shape = (cfg .batch_size , - 1 , 1 , input .shape [- 2 ].value , 1 ))
81+
82+ with tf .variable_scope ('routing' ):
83+ # b_IJ: [1, num_caps_l, num_caps_l_plus_1, 1, 1]
84+ b_IJ = tf .constant (np .zeros ([1 , input .shape [1 ].value , self .num_outputs , 1 , 1 ], dtype = np .float32 ))
85+ capsules = routing (self .input , b_IJ )
86+ capsules = tf .squeeze (capsules , axis = 1 )
87+
88+ return (capsules )
89+
90+
91+ def routing (input , b_IJ ):
92+ ''' The routing algorithm.
93+ Args:
94+ input: A Tensor with [batch_size, num_caps_l=1152, 1, length(u_i)=8, 1]
95+ shape, num_caps_l meaning the number of capsule in the layer l.
96+ Returns:
97+ A Tensor of shape [batch_size, num_caps_l_plus_1, length(v_j)=16, 1]
98+ representing the vector output `v_j` in the layer l+1
99+ Notes:
80100 u_i represents the vector output of capsule i in the layer l, and
81101 v_j the vector output of capsule j in the layer l+1.
82- '''
83-
84- with tf .variable_scope ('routing' ):
85- w_initializer = np .random .normal (size = [1 , 1152 , 8 , 16 ], scale = 0.01 )
86- W_Ij = tf .Variable (w_initializer , dtype = tf .float32 )
87- # repeat W_Ij with batch_size times to shape [batch_size, 1152, 8, 16]
88- W_Ij = tf .tile (W_Ij , [cfg .batch_size , 1 , 1 , 1 ])
89-
90- # calc u_hat
91- # [8, 16].T x [8, 1] => [16, 1] => [batch_size, 1152, 16, 1]
92- u_hat = tf .matmul (W_Ij , input , transpose_a = True )
93- assert u_hat .get_shape () == [cfg .batch_size , 1152 , 16 , 1 ]
94-
95- shape = b_IJ .get_shape ().as_list ()
96- size_splits = [idx_j , 1 , shape [2 ] - idx_j - 1 ]
97- for r_iter in range (cfg .iter_routing ):
98- # line 4:
99- # [1, 1152, 10, 1]
100- c_IJ = tf .nn .softmax (b_IJ , dim = 2 )
101- assert c_IJ .get_shape () == [1 , 1152 , 10 , 1 ]
102-
103- # line 5:
104- # weighting u_hat with c_I in the third dim,
105- # then sum in the second dim, resulting in [batch_size, 1, 16, 1]
106- b_Il , b_Ij , b_Ir = tf .split (b_IJ , size_splits , axis = 2 )
107- c_Il , c_Ij , b_Ir = tf .split (c_IJ , size_splits , axis = 2 )
108- assert c_Ij .get_shape () == [1 , 1152 , 1 , 1 ]
109-
110- s_j = tf .multiply (c_Ij , u_hat )
111- s_j = tf .reduce_sum (tf .multiply (c_Ij , u_hat ),
112- axis = 1 , keep_dims = True )
113- assert s_j .get_shape () == [cfg .batch_size , 1 , 16 , 1 ]
114-
115- # line 6:
116- # squash using Eq.1, resulting in [batch_size, 1, 16, 1]
117- v_j = squash (s_j )
118- assert s_j .get_shape () == [cfg .batch_size , 1 , 16 , 1 ]
119-
120- # line 7:
121- # tile v_j from [batch_size ,1, 16, 1] to [batch_size, 1152, 16, 1]
122- # [16, 1].T x [16, 1] => [1, 1], then reduce mean in the
123- # batch_size dim, resulting in [1, 1152, 1, 1]
124- v_j_tiled = tf .tile (v_j , [1 , 1152 , 1 , 1 ])
125- u_produce_v = tf .matmul (u_hat , v_j_tiled , transpose_a = True )
126- assert u_produce_v .get_shape () == [cfg .batch_size , 1152 , 1 , 1 ]
127- b_Ij += tf .reduce_sum (u_produce_v , axis = 0 , keep_dims = True )
128- b_IJ = tf .concat ([b_Il , b_Ij , b_Ir ], axis = 2 )
129-
130- return (v_j , b_IJ )
102+ '''
103+
104+ # W: [num_caps_j, num_caps_i, len_u_i, len_v_j]
105+ W = tf .get_variable ('Weight' , shape = (1 , 1152 , 10 , 8 , 16 ), dtype = tf .float32 ,
106+ initializer = tf .random_normal_initializer (stddev = cfg .stddev ))
107+
108+ # Eq.2, calc u_hat
109+ # do tiling for input and W before matmul
110+ # input => [batch_size, 1152, 10, 8, 1]
111+ # W => [batch_size, 1152, 10, 8, 16]
112+ input = tf .tile (input , [1 , 1 , 10 , 1 , 1 ])
113+ W = tf .tile (W , [cfg .batch_size , 1 , 1 , 1 , 1 ])
114+ assert input .get_shape () == [cfg .batch_size , 1152 , 10 , 8 , 1 ]
115+
116+ # in last 2 dims:
117+ # [8, 16].T x [8, 1] => [16, 1] => [batch_size, 1152, 10, 16, 1]
118+ u_hat = tf .matmul (W , input , transpose_a = True )
119+ assert u_hat .get_shape () == [cfg .batch_size , 1152 , 10 , 16 , 1 ]
120+
121+ # line 3,for r iterations do
122+ for r_iter in range (cfg .iter_routing ):
123+ with tf .variable_scope ('iter_' + str (r_iter )):
124+ # line 4:
125+ # => [1, 1152, 10, 1, 1]
126+ c_IJ = tf .nn .softmax (b_IJ , dim = 2 )
127+ c_IJ = tf .tile (c_IJ , [cfg .batch_size , 1 , 1 , 1 , 1 ])
128+ assert c_IJ .get_shape () == [cfg .batch_size , 1152 , 10 , 1 , 1 ]
129+
130+ # line 5:
131+ # weighting u_hat with c_IJ, element-wise in the last two dims
132+ # => [batch_size, 1152, 10, 16, 1]
133+ s_J = tf .multiply (c_IJ , u_hat )
134+ # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1]
135+ s_J = tf .reduce_sum (s_J , axis = 1 , keep_dims = True )
136+ assert s_J .get_shape () == [cfg .batch_size , 1 , 10 , 16 , 1 ]
137+
138+ # line 6:
139+ # squash using Eq.1,
140+ v_J = squash (s_J )
141+ assert v_J .get_shape () == [cfg .batch_size , 1 , 10 , 16 , 1 ]
142+
143+ # line 7:
144+ # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 10, 1152, 16, 1]
145+ # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the
146+ # batch_size dim, resulting in [1, 1152, 10, 1, 1]
147+ v_J_tiled = tf .tile (v_J , [1 , 1152 , 1 , 1 , 1 ])
148+ u_produce_v = tf .matmul (u_hat , v_J_tiled , transpose_a = True )
149+ assert u_produce_v .get_shape () == [cfg .batch_size , 1152 , 10 , 1 , 1 ]
150+ b_IJ += tf .reduce_sum (u_produce_v , axis = 0 , keep_dims = True )
151+
152+ return (v_J )
131153
132154
133155def squash (vector ):
134- '''Squashing function.
135- Args:
136- vector: A 4-D tensor with shape [batch_size, num_caps, vec_len, 1],
137- Returns:
138- A 4-D tensor with the same shape as vector but
139- squashed in 3rd and 4th dimensions.
140- '''
141- vec_abs = tf .sqrt (tf .reduce_sum (tf .square (vector ))) # a scalar
142- scalar_factor = tf .square (vec_abs ) / (1 + tf .square (vec_abs ))
143- vec_squashed = scalar_factor * tf .divide (vector , vec_abs ) # element-wise
144- return (vec_squashed )
156+ '''Squashing function corresponding to Eq. 1
157+ Args:
158+ vector: A 5-D tensor with shape [batch_size, 1, num_caps, vec_len, 1],
159+ Returns:
160+ A 5-D tensor with the same shape as vector but squashed in 4rd and 5th dimensions.
161+ '''
162+ vec_squared_norm = tf .reduce_sum (tf .square (vector ), - 2 , keep_dims = True )
163+ scalar_factor = vec_squared_norm / (1 + vec_squared_norm ) / tf .sqrt (vec_squared_norm + epsilon )
164+ vec_squashed = scalar_factor * vector # element-wise
165+ return (vec_squashed )
166+
167+
168+ # TODO: 1. Test the `fully_connected` and `conv2d` function;
169+ # 2. Update docs about these two function.
170+ def fully_connected (inputs ,
171+ num_outputs ,
172+ vec_len ,
173+ with_routing = True ,
174+ weights_initializers = tf .contrib .layers .xavier_initializer (),
175+ reuse = None ,
176+ variable_collections = None ,
177+ scope = None ):
178+ '''A capsule fully connected layer.(Note: not tested yet)
179+ Args:
180+ inputs: A tensor of as least rank 3, i.e. `[batch_size, num_inputs, vec_len]`,
181+ `[batch_size, num_inputs, vec_len, 1]`.
182+ num_outputs: ...
183+ Returns:
184+ ...
185+ Raise:
186+ ...
187+ '''
188+ layer = CapsLayer (num_outputs = num_outputs ,
189+ vec_len = vec_len ,
190+ with_routing = with_routing ,
191+ layer_type = 'FC' )
192+ return layer .apply (inputs )
193+
194+
195+ def conv2d (inputs ,
196+ filters ,
197+ vec_len ,
198+ kernel_size ,
199+ strides = (1 , 1 ),
200+ with_routing = False ,
201+ reuse = None ):
202+ '''A capsule convolutional layer.(Note: not tested yet)
203+ Args:
204+ inputs: A tensor.
205+ Returns:
206+ ...
207+ Raises:
208+ ...
209+ '''
210+ layer = CapsLayer (num_outputs = filters ,
211+ vec_len = vec_len ,
212+ with_routing = with_routing ,
213+ layer_type = 'CONV' )
214+ return (layer (inputs , kernel_size = kernel_size , stride = strides ))
0 commit comments