Skip to content

Commit

Permalink
Bug fixes discovered during testing
Browse files Browse the repository at this point in the history
  • Loading branch information
Old-Shatterhand committed Apr 4, 2024
1 parent 676674b commit 62f0afe
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 14 deletions.
9 changes: 3 additions & 6 deletions datasail/cluster/vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,12 @@
"russel", "sokal", "tanimoto"
]

# unbounded: mcconnaughey
# unbounded: canberra, chebyshev, cityblock, euclidean, mcconnaughey, manhattan, minkowski, sqeuclidean
# produces inf or nan: correlation, cosine, jensenshannon, seuclidean, braycurtis
# boolean only: dice, kulczynski1, rogerstanimoto, russelrao, sokalmichener, sokalsneath, yule
# matching == hamming, manhattan == cityblock (inofficial)
DIST_OPTIONS = Literal[
"canberra", "chebyshev", "cityblock", "euclidean", "hamming", "jaccard", "mahalanobis", "manhattan", "matching",
"minkowski", "sqeuclidean"
"hamming", "jaccard", "mahalanobis", "matching"
]


Expand Down Expand Up @@ -181,9 +180,7 @@ def run(
f"the embeddings. The number of samples ({len(fps)}) is too small; the covariance matrix is singular. "
f"For observations with {len(fps[0])} dimensions, at least {len(fps[0]) + 1} observations are required."
)
dataset.cluster_distance = scipy.spatial.distance.cdist(
fps, fps, metric={"manhattan": "cityblock", "tanimoto": "jaccard"}.get(method, method)
)
dataset.cluster_distance = scipy.spatial.distance.cdist(fps, fps, metric=method)


# if __name__ == '__main__':
Expand Down
2 changes: 0 additions & 2 deletions datasail/solver/cluster_1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ def solve_c1(
) for e2 in range(e1 + 1, len(clusters))] for e1 in range(len(clusters))] # 15

loss = cvxpy.sum([t for tmp_list in tmp for t in tmp_list])
if distances is not None:
loss = -loss
problem = solve(loss, constraints, max_sec, solver, log_file)

return None if problem is None else {
Expand Down
8 changes: 2 additions & 6 deletions tests/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,7 @@ def test_mmseqspp_protein():
@pytest.mark.parametrize("in_type", ["Original", "List", "Numpy"])
@pytest.mark.parametrize("method", [
"allbit", "asymmetric", "braunblanquet", "cosine", "dice", "kulczynski", "onbit", "rogotgoldberg", "russel",
"sokal",
"canberra", "chebyshev", "cityblock", "euclidean", "hamming", "jaccard",
"mahalanobis", "manhattan", "matching", "minkowski", "sqeuclidean", "tanimoto"
"sokal", "tanimoto", "hamming", "jaccard", "mahalanobis", "matching"
])
def test_vector(md_calculator, algo, in_type, method) :
data = molecule_data()
Expand All @@ -284,9 +282,7 @@ def test_vector(md_calculator, algo, in_type, method) :

@pytest.mark.parametrize("method", [
"allbit", "asymmetric", "braunblanquet", "cosine", "dice", "kulczynski", "onbit", "rogotgoldberg", "russel",
"sokal",
"canberra", "chebyshev", "cityblock", "euclidean", "hamming", "jaccard", "mahalanobis", "manhattan", "matching",
"minkowski", "sqeuclidean", "tanimoto"
"sokal", "hamming", "jaccard", "mahalanobis", "matching"
])
def test_vector_edge(method):
dataset = DataSet(
Expand Down

0 comments on commit 62f0afe

Please sign in to comment.