diff --git a/crf.py b/crf.py
index 1563803..9fccb53 100644
--- a/crf.py
+++ b/crf.py
@@ -98,12 +98,12 @@ def neg_likelihood_and_deriv(self,x_vec_list,y_vec_list,theta,debug=False):
 			probabilities in one step, which means its faster, because it's done
 			in numpy
 			"""
-			log_alphas1 = log_alphas.reshape(time,state,1) 
+			log_alphas1 = log_alphas.reshape(time,state,1)
 			log_betas1  = log_betas.reshape(time,1,state)
 			log_Z       = misc.logsumexp(last)
 			log_probs   = log_alphas1 + log_M + log_betas1 - log_Z
 			log_probs   = log_probs.reshape(log_probs.shape+(1,))
-			#print log_Z
+
 			"""
 			Find the expected value of f_k over all transitions
 					 and emperical values
@@ -197,8 +197,14 @@ def _predict(self,x_vec):
 	vectorised_x_vecs,vectorised_y_vecs = crf.create_vector_list([x_vec],[y_vec])
 	l = lambda theta: crf.neg_likelihood_and_deriv(vectorised_x_vecs,vectorised_y_vecs,theta)
 	#crf.theta = optimize.fmin_bfgs(l, crf.theta, maxiter=100)
-	theta,_,_ = optimize.fmin_l_bfgs_b(l, crf.theta)
+	#theta,_,_ = optimize.fmin_l_bfgs_b(l, crf.theta)
+	theta = crf.theta
+	for _ in range(10000):
+		value, gradient = l(theta)
+		print value
+		theta = theta - 0.01*gradient
 	crf.theta = theta
+	print theta
 	print "Minimized...."
 	print crf.neg_likelihood_and_deriv(vectorised_x_vecs,vectorised_y_vecs,crf.theta)
 	print
diff --git a/example.py b/example.py
index 17ad5b6..6e1871d 100644
--- a/example.py
+++ b/example.py
@@ -23,8 +23,7 @@
 	lbls   = [START] + labels +  [END]
 	transition_functions = [
 			lambda yp,y,x_v,i,_yp=_yp,_y=_y: 1 if yp==_yp and y==_y else 0
-				for _yp in lbls[:-1]
-				for _y  in lbls[1:]]
+				for _yp in lbls[:-1] for _y  in lbls[1:]]
 	def set_membership(tag):
 		def fun(yp,y,x_v,i):
 			if i < len(x_v) and x_v[i].lower() in word_sets[tag]:
@@ -52,12 +51,13 @@ def print_value(theta):
 		print crf.neg_likelihood_and_deriv(vectorised_x_vecs,vectorised_y_vecs,theta)
 
 	#val = optimize.fmin_l_bfgs_b(l, crf.theta)
+	#print val
+	#theta,_,_  = val
+	theta = crf.theta
 	for _ in range(10000):
-		value, gradient = l(crf.theta)
+		value, gradient = l(theta)
 		print value
-		crf.theta = crf.theta - 0.1*gradient
-	print val
-	theta,_,_  = val
+		theta = theta - 0.1*gradient
 	crf.theta = theta
 	print crf.neg_likelihood_and_deriv(vectorised_x_vecs,vectorised_y_vecs,crf.theta)
 	print