working without sparsity params

Joseph Perla · Joseph Perla · commit 9f062bbaf1a1 · 2012-03-31T12:01:42.000-04:00
diff --git a/display.py b/display.py
@@ -28,9 +28,10 @@ def array_to_file(filename, a):
     i = Image.fromarray(a.astype('uint8'))
     return i.save(filename)
 
-def display_network(filename, images):
+def display_network(filename, images, padding=1):
     """Accepts filename string,
-        2-d numpy array of images.
+        2-d numpy array of images,
+        and padding (default 1) number of black pixels between images.
         Each column of images is a filter. 
 
         This function visualizes filters in matrix images. 
@@ -58,13 +59,17 @@ def display_network(filename, images):
     cols = int(math.sqrt(n))
     rows = int(n / cols) + (1 if n % cols > 0 else 0)
 
+    # black background in output
+    p = padding
+    output = np.zeros((p + rows * (d + p), p + cols * (d + p)))
+    output += np.min(images.flatten())
     # then fill in the output
-    output = np.empty((rows * d, cols * d))
     for i in xrange(n):
         r,c = int(i / cols), i % cols
         image = images[:,i]
         image.shape = (d,d)
-        output[r*d:(r*d+d),c*d:(c*d+d)] = image 
+        x,y = (r*(d+p))+p, (c*(d+p))+p
+        output[x:x+d,y:y+d] = image
 
     # and save it 
     return array_to_file(filename, output)
diff --git a/numerical_gradient.py b/numerical_gradient.py
@@ -26,10 +26,10 @@ def offset(size, epsilon, i):
         y[i] += epsilon
         return y
 
-    e = partial(offset, size, epsilon)
+    o = partial(offset, size, epsilon)
     for i in xrange(size):
-        q = e(i)
-        grad[i] = (J(theta + q) - J(theta - q)) / (2 * epsilon)
+        e = o(i)
+        grad[i] = (J(theta + e) - J(theta - e)) / (2 * epsilon)
 
     return grad
 
diff --git a/sparse_autoencoder.py b/sparse_autoencoder.py
@@ -28,27 +28,31 @@ def cost(theta, visible_size, hidden_size,
     """
     hv = hidden_size * visible_size
 
-    W1 = theta[1:hv].reshape(hidden_size, visible_size)
-    W2 = theta[hv+1:2*hv].reshape(visible_size, hidden_size)
-    b1 = theta[2*hv+1:2*hv+hidden_size]
-    b2 = theta[2*hv+hidden_size+1:]
+    W1 = theta[:hv].reshape(hidden_size, visible_size)
+    W2 = theta[hv:2*hv].reshape(visible_size, hidden_size)
+    b1 = theta[2*hv:2*hv+hidden_size]
+    b2 = theta[2*hv+hidden_size:]
 
     # Cost and gradient variables (your code needs to compute these values). 
 
     # Here, we initialize them to zeros. 
-    cost = 0
-
     W1grad = np.zeros(W1.shape)
     W2grad = np.zeros(W2.shape)
     b1grad = np.zeros(b1.shape)
     b2grad = np.zeros(b2.shape)
 
+    def T(a):
+        """Given 1-d array. Make it a column vector.
+            Returns 2d array with Nx1 size.
+        """
+        return a.reshape(len(a), 1)
+
     num_data = data.shape[1]
     # do a feed forward pass
      # a2: (hidden_size, num_data)
-    a2 = sigmoid(np.dot(W1, data) + b1.T)
+    a2 = sigmoid(np.dot(W1, data) + T(b1))
      # a2: (visible_size, num_data)
-    a3 = sigmoid(np.dot(W2, a2) + b2.T)
+    a3 = sigmoid(np.dot(W2, a2) + T(b2))
     assert a2.shape == (hidden_size, num_data)
     assert a3.shape == (visible_size, num_data)
 
@@ -60,20 +64,22 @@ def cost(theta, visible_size, hidden_size,
      # delta2: (hidden, num_data)
     delta2 = np.dot(W2.T, delta3) * (a2 * (1 - a2))
 
-    W1grad = np.dot(delta2, data.T)
-    W2grad = np.dot(delta3, a2.T)
-    b1grad = delta2
-    b2grad = delta3
+    W1grad[:,:] = np.dot(delta2, data.T) / float(num_data)
+    W2grad[:,:] = np.dot(delta3, a2.T) / float(num_data)
+    b1grad[:] = np.sum(delta2, axis=1) / float(num_data)
+    b2grad[:] = np.sum(delta3, axis=1) / float(num_data)
     
     grad = flatten_params(W1grad, W2grad, b1grad, b2grad)
     return cost, grad
 
-def initialize_parameters(hidden_size, visible_size):
+def initialize_params(hidden_size, visible_size):
     """Accepts number of hidde states in sparse encoder,
             and number of input states in sparse encoder..
        Initialize parameters randomly based on layer sizes.
        Returns a new flat array of size 2*visisble_size + hidden_size
     """
+    assert hidden_size < visible_size
+
     #we'll choose weights uniformly from the interval [-r, r]
     r  = np.sqrt(6) / np.sqrt(hidden_size + visible_size + 1)
     W1 = np.random.rand(hidden_size, visible_size) * 2 * r - r
diff --git a/test_sparse_autoencoder.py b/test_sparse_autoencoder.py
@@ -9,12 +9,12 @@
 from test_numerical_gradient import diff_grad
 
 def test_sae_cost():
-    patch_size = (3,3)
+    patch_size = (8,8)
     visible_size = patch_size[0] * patch_size[1]
-    hidden_size = 3
+    hidden_size = 25
     weight_decay, sparsity_param, beta = 0, 0, 0
 
-    num_samples = 10
+    num_samples = 50
     images = sample_images.load_matlab_images('IMAGES.mat')
     patches = sample_images.sample(images, num_samples, patch_size)
 
@@ -26,18 +26,23 @@ def test_sae_cost():
                             beta=beta,
                             data=patches)
 
-    theta = sparse_autoencoder.initialize_params()
+    theta = sparse_autoencoder.initialize_params(hidden_size, visible_size)
     cost, grad = sae_cost(theta)
-    ncost, ngrad = numerical_gradient.compute(theta, 
-                                              lambda x: sae_cost(x)[0],
-                                              epsilon=0.0001)
-
-    print ncost, cost
+    ngrad = numerical_gradient.compute(theta,
+                                       lambda x: sae_cost(x)[0],
+                                       epsilon=0.0001)
+    print cost
+    print ngrad.shape, grad.shape
     print ngrad, grad
 
     diff = diff_grad(ngrad, grad)
     print diff
 
-    less_than = 1e-9
-    assert diff < less_than
+    threshold = 1e-9
+    assert diff < threshold
+
+
+    grad[8] = 1000
+    assert diff_grad(ngrad, grad) > threshold
+