Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
1,961 changes: 273 additions & 1,688 deletions TICC_solver.py

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import TICC_solver as TICC
import numpy as np
import sys

fname = "example_data.txt"
(cluster_assignment, cluster_MRFs) = TICC.solve(window_size = 1,number_of_clusters = 8, lambda_parameter = 11e-2, beta = 600, maxIters = 100, threshold = 2e-5, write_out_file = False, input_file = fname, prefix_string = "output_folder/", num_proc=1)

print cluster_assignment
np.savetxt('Results.txt', cluster_assignment, fmt='%d', delimiter=',')
19,607 changes: 19,607 additions & 0 deletions example_data.txt

Large diffs are not rendered by default.

1,129 changes: 0 additions & 1,129 deletions solveCrossTime.py

This file was deleted.

171 changes: 171 additions & 0 deletions src/TICC_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import numpy as np
import __builtin__ as bt
def getTrainTestSplit(m, num_blocks, num_stacked):
'''
- m: number of observations
- num_blocks: window_size + 1
- num_stacked: window_size
Returns:
- sorted list of training indices
'''
# Now splitting up stuff
# split1 : Training and Test
# split2 : Training and Test - different clusters
training_percent = 1
# list of training indices
training_idx = np.random.choice(m-num_blocks+1, size=int((m-num_stacked)*training_percent),replace = False )
# Ensure that the first and the last few points are in
training_idx = list(training_idx)
if 0 not in training_idx:
training_idx.append(0)
if m - num_stacked not in training_idx:
training_idx.append(m-num_stacked)
training_idx = np.array(training_idx)
return sorted(training_idx)

def upperToFull(a, eps = 0):
ind = (a<eps)&(a>-eps)
a[ind] = 0
n = int((-1 + np.sqrt(1+ 8*a.shape[0]))/2)
A = np.zeros([n,n])
A[np.triu_indices(n)] = a
temp = A.diagonal()
A = np.asarray((A + A.T) - np.diag(temp))
return A

def hex_to_rgb(value):
"""Return (red, green, blue) for the color given as #rrggbb."""
lv = bt.len(value)
out = tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))
out = tuple([x/256.0 for x in out])
return out

def updateClusters(LLE_node_vals,switch_penalty = 1):
"""
Takes in LLE_node_vals matrix and computes the path that minimizes
the total cost over the path
Note the LLE's are negative of the true LLE's actually!!!!!

Note: switch penalty > 0
"""
(T,num_clusters) = LLE_node_vals.shape
future_cost_vals = np.zeros(LLE_node_vals.shape)

##compute future costs
for i in xrange(T-2,-1,-1):
j = i+1
indicator = np.zeros(num_clusters)
future_costs = future_cost_vals[j,:]
lle_vals = LLE_node_vals[j,:]
for cluster in xrange(num_clusters):
total_vals = future_costs + lle_vals + switch_penalty
total_vals[cluster] -= switch_penalty
future_cost_vals[i,cluster] = np.min(total_vals)

##compute the best path
path = np.zeros(T)

##the first location
curr_location = np.argmin(future_cost_vals[0,:] + LLE_node_vals[0,:])
path[0] = curr_location

##compute the path
for i in xrange(T-1):
j = i+1
future_costs = future_cost_vals[j,:]
lle_vals = LLE_node_vals[j,:]
total_vals = future_costs + lle_vals + switch_penalty
total_vals[int(path[i])] -= switch_penalty

path[i+1] = np.argmin(total_vals)

##return the computed path
return path

def find_matching(confusion_matrix):
"""
returns the perfect matching
"""
_,n = confusion_matrix.shape
path = []
for i in xrange(n):
max_val = -1e10
max_ind = -1
for j in xrange(n):
if j in path:
pass
else:
temp = confusion_matrix[i,j]
if temp > max_val:
max_val = temp
max_ind = j
path.append(max_ind)
return path

def computeF1Score_delete(num_cluster,matching_algo,actual_clusters,threshold_algo,save_matrix = False):
"""
computes the F1 scores and returns a list of values
"""
F1_score = np.zeros(num_cluster)
for cluster in xrange(num_cluster):
matched_cluster = matching_algo[cluster]
true_matrix = actual_clusters[cluster]
estimated_matrix = threshold_algo[matched_cluster]
if save_matrix: np.savetxt("estimated_matrix_cluster=" + str(cluster)+".csv",estimated_matrix,delimiter = ",", fmt = "%1.4f")
TP = 0
TN = 0
FP = 0
FN = 0
for i in xrange(num_stacked*n):
for j in xrange(num_stacked*n):
if estimated_matrix[i,j] == 1 and true_matrix[i,j] != 0:
TP += 1.0
elif estimated_matrix[i,j] == 0 and true_matrix[i,j] == 0:
TN += 1.0
elif estimated_matrix[i,j] == 1 and true_matrix[i,j] == 0:
FP += 1.0
else:
FN += 1.0
precision = (TP)/(TP + FP)
print "cluster #", cluster
print "TP,TN,FP,FN---------->", (TP,TN,FP,FN)
recall = TP/(TP + FN)
f1 = (2*precision*recall)/(precision + recall)
F1_score[cluster] = f1
return F1_score

def compute_confusion_matrix(num_clusters,clustered_points_algo, sorted_indices_algo):
"""
computes a confusion matrix and returns it
"""
seg_len = 400
true_confusion_matrix = np.zeros([num_clusters,num_clusters])
for point in xrange(bt.len(clustered_points_algo)):
cluster = clustered_points_algo[point]
num = (int(sorted_indices_algo[point]/seg_len) %num_clusters)
true_confusion_matrix[int(num),int(cluster)] += 1
return true_confusion_matrix

def computeF1_macro(confusion_matrix,matching, num_clusters):
"""
computes the macro F1 score
confusion matrix : requres permutation
matching according to which matrix must be permuted
"""
##Permute the matrix columns
permuted_confusion_matrix = np.zeros([num_clusters,num_clusters])
for cluster in xrange(num_clusters):
matched_cluster = matching[cluster]
permuted_confusion_matrix[:,cluster] = confusion_matrix[:,matched_cluster]
##Compute the F1 score for every cluster
F1_score = 0
for cluster in xrange(num_clusters):
TP = permuted_confusion_matrix[cluster,cluster]
FP = np.sum(permuted_confusion_matrix[:,cluster]) - TP
FN = np.sum(permuted_confusion_matrix[cluster,:]) - TP
precision = TP/(TP + FP)
recall = TP/(TP + FN)
f1 = stats.hmean([precision,recall])
F1_score += f1
F1_score /= num_clusters
return F1_score
Empty file added src/__init__.py
Empty file.
127 changes: 127 additions & 0 deletions src/admm_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import numpy
import math
class ADMMSolver:
def __init__(self, lamb, num_stacked, size_blocks, rho, S, rho_update_func=None):
self.lamb = lamb
self.numBlocks = num_stacked
self.sizeBlocks = size_blocks
probSize = num_stacked*size_blocks
self.length = probSize*(probSize+1)/2
self.x = numpy.zeros(self.length)
self.z = numpy.zeros(self.length)
self.u = numpy.zeros(self.length)
self.rho = float(rho)
self.S = S
self.status = 'initialized'
self.rho_update_func = rho_update_func

def ij2symmetric(self, i,j,size):
return (size * (size + 1))/2 - (size-i)*((size - i + 1))/2 + j - i

def upper2Full(self, a):
n = int((-1 + numpy.sqrt(1+ 8*a.shape[0]))/2)
A = numpy.zeros([n,n])
A[numpy.triu_indices(n)] = a
temp = A.diagonal()
A = (A + A.T) - numpy.diag(temp)
return A

def Prox_logdet(self, S, A, eta):
d, q = numpy.linalg.eigh(eta*A-S)
q = numpy.matrix(q)
X_var = ( 1/(2*float(eta)) )*q*( numpy.diag(d + numpy.sqrt(numpy.square(d) + (4*eta)*numpy.ones(d.shape))) )*q.T
x_var = X_var[numpy.triu_indices(S.shape[1])] # extract upper triangular part as update variable
return numpy.matrix(x_var).T

def ADMM_x(self):
a = self.z-self.u
A = self.upper2Full(a)
eta = 1/self.rho
x_update = self.Prox_logdet(self.S, A, eta)
self.x = numpy.array(x_update).T.reshape(-1)

def ADMM_z(self, index_penalty = 1):
a = self.x + self.u
probSize = self.numBlocks*self.sizeBlocks
z_update = numpy.zeros(self.length)

# TODO: can we parallelize these?
for i in range(self.numBlocks):
elems = self.numBlocks if i==0 else (2*self.numBlocks - 2*i)/2 # i=0 is diagonal
for j in range(self.sizeBlocks):
for k in range(j, self.sizeBlocks):
locList = [((l+i)*self.sizeBlocks + j, l*self.sizeBlocks+k) for l in range(elems)]
lamSum = sum(self.lamb[loc1, loc2] for (loc1, loc2) in locList)
indices = [self.ij2symmetric(loc1, loc2, probSize) for (loc1, loc2) in locList]
pointSum = sum(a[index] for index in indices)
rhoPointSum = self.rho * pointSum

#Calculate soft threshold
ans = 0
#If answer is positive
if rhoPointSum > lamSum:
ans = max((rhoPointSum - lamSum)/(self.rho*elems),0)
elif rhoPointSum < -1*lamSum:
ans = min((rhoPointSum + lamSum)/(self.rho*elems),0)

for index in indices:
z_update[index] = ans
self.z = z_update

def ADMM_u(self):
u_update = self.u + self.x - self.z
self.u = u_update

# Returns True if convergence criteria have been satisfied
# eps_abs = eps_rel = 0.01
# r = x - z
# s = rho * (z - z_old)
# e_pri = sqrt(length) * e_abs + e_rel * max(||x||, ||z||)
# e_dual = sqrt(length) * e_abs + e_rel * ||rho * u||
# Should stop if (||r|| <= e_pri) and (||s|| <= e_dual)
# Returns (boolean shouldStop, primal residual value, primal threshold,
# dual residual value, dual threshold)
def CheckConvergence(self, z_old, e_abs, e_rel, verbose):
norm = numpy.linalg.norm
r = self.x - self.z
s = self.rho * (self.z - z_old)
# Primal and dual thresholds. Add .0001 to prevent the case of 0.
e_pri = math.sqrt(self.length) * e_abs + e_rel * max(norm(self.x), norm(self.z)) + .0001
e_dual = math.sqrt(self.length) * e_abs + e_rel * norm(self.rho * self.u) + .0001
# Primal and dual residuals
res_pri = norm(r)
res_dual = norm(s)
if verbose:
# Debugging information to print convergence criteria values
print ' r:', res_pri
print ' e_pri:', e_pri
print ' s:', res_dual
print ' e_dual:', e_dual
stop = (res_pri <= e_pri) and (res_dual <= e_dual)
return (stop, res_pri, e_pri, res_dual, e_dual)

#solve
def __call__(self, maxIters, eps_abs, eps_rel, verbose):
num_iterations = 0
self.status = 'Incomplete: max iterations reached'
for i in range(maxIters):
z_old = numpy.copy(self.z)
self.ADMM_x()
self.ADMM_z()
self.ADMM_u()
if i != 0:
stop, res_pri, e_pri, res_dual, e_dual = self.CheckConvergence(z_old, eps_abs, eps_rel, verbose)
if stop:
self.status = 'Optimal'
break
new_rho = self.rho
if self.rho_update_func:
new_rho = rho_update_func(self.rho, res_pri, e_pri, res_dual, e_dual)
scale = self.rho / new_rho
rho = new_rho
self.u = scale*self.u

if verbose:
# Debugging information prints current iteration #
print 'Iteration %d' % i
return self.x