Skip to content

Commit

Permalink
Pre-vectorisation of labels
Browse files Browse the repository at this point in the history
  • Loading branch information
shawntan committed Apr 22, 2013
1 parent 676bcb7 commit 79f07f1
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 19 deletions.
37 changes: 21 additions & 16 deletions crf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,22 @@ def backward(self,M):
beta = betas[i] = log_dot_mv(M[i+1],beta)
beta = log_dot_mv(M[0],beta)
return (betas,beta)
def create_vector_list(self,x_vecs):
return [ self.all_features(x_vec) for x_vec in x_vecs ]
def neg_likelihood_and_deriv(self,x_vec_list,y_vecs,theta,debug=False):

def create_vector_list(self,x_vecs,y_vecs):
observations = [ self.all_features(x_vec) for x_vec in x_vecs ]
labels = len(y_vecs)*[None]

for i in range(len(y_vecs)):
y_vecs[i].insert(0,START)
y_vecs[i].append(END)
labels[i] = np.array([ self.label_id[y] for y in y_vecs[i] ],copy=False,dtype=np.int)

return (observations,labels)

def neg_likelihood_and_deriv(self,x_vec_list,y_vec_list,theta,debug=False):
likelihood = 0
derivative = np.zeros(len(self.theta))
for x_vec,y_vec in zip(x_vec_list,y_vecs):
for x_vec,y_vec in zip(x_vec_list,y_vec_list):
"""
all_features: len(x_vec) + 1 x Y x Y x K
M: len(x_vec) + 1 x Y x Y
Expand All @@ -76,9 +86,9 @@ def neg_likelihood_and_deriv(self,x_vec_list,y_vecs,theta,debug=False):
"""
all_features = x_vec
length = x_vec.shape[0]
y_vec = [START] + y_vec + [END]
yp_vec_ids = [ self.label_id[yp] for yp in y_vec[:-1] ]
y_vec_ids = [ self.label_id[y] for y in y_vec[1:] ]
#y_vec = [START] + y_vec + [END]
yp_vec_ids = y_vec[:-1]
y_vec_ids = y_vec[1:]
log_M = np.dot(all_features,theta)
log_alphas,last = self.forward(log_M)
log_betas, zero = self.backward(log_M)
Expand Down Expand Up @@ -164,10 +174,6 @@ def predict(self,x_vec):
def _predict(self,x_vec):
all_features = self.all_features(x_vec)





if __name__ == "__main__":
labels = ['A','B','C']
obsrvs = ['a','b','c','d','e','f']
Expand All @@ -185,13 +191,12 @@ def _predict(self,x_vec):
x_vec = ["a","b","c","d","e","f"]
y_vec = ["A","B","C","A","B","C"]

vectorised_x_vecs = crf.create_vector_list([x_vec])
l = lambda theta: crf.neg_likelihood_and_deriv(vectorised_x_vecs,[y_vec],theta)
vectorised_x_vecs,vectorised_y_vecs = crf.create_vector_list([x_vec],[y_vec])
l = lambda theta: crf.neg_likelihood_and_deriv(vectorised_x_vecs,vectorised_y_vecs,theta)
#crf.theta = optimize.fmin_bfgs(l, crf.theta, maxiter=100)
theta,_,_ = optimize.fmin_l_bfgs_b(l, crf.theta)
crf.theta = theta
print crf.neg_likelihood_and_deriv(vectorised_x_vecs,[y_vec],crf.theta)
print
print "Minimized...."
print crf.neg_likelihood_and_deriv(vectorised_x_vecs,vectorised_y_vecs,crf.theta)
print
print x_vec
print crf.predict(x_vec)
6 changes: 3 additions & 3 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@
crf = CRF( labels = labels,
feature_functions = transition_functions + observation_functions )

vectorised_x_vecs = crf.create_vector_list(word_data)
l = lambda theta: crf.neg_likelihood_and_deriv(vectorised_x_vecs,label_data,theta)
vectorised_x_vecs,vectorised_y_vecs = crf.create_vector_list(word_data,label_data)
l = lambda theta: crf.neg_likelihood_and_deriv(vectorised_x_vecs,vectorised_y_vecs,theta)
#crf.theta = optimize.fmin_bfgs(l, crf.theta, maxiter=100)
print "Minimizing..."
theta,_,_ = optimize.fmin_l_bfgs_b(l, crf.theta)
crf.theta = theta
print crf.neg_likelihood_and_deriv(vectorised_x_vecs,label_data,crf.theta)
print crf.neg_likelihood_and_deriv(vectorised_x_vecs,vectorised_y_vecs,crf.theta)
print
print
x_vec = word_data[-1]
Expand Down

0 comments on commit 79f07f1

Please sign in to comment.