Skip to content

Commit

Permalink
Adjusting output to have cell/damaged not 1/0 labels
Browse files Browse the repository at this point in the history
  • Loading branch information
AlicenJoyHenning committed Oct 18, 2024
1 parent 7cd251b commit 5b4f0a0
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 6 deletions.
2 changes: 1 addition & 1 deletion example_data/Kolodziejczyk.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
,Actb,Gadph,Metabolic process,#Detected Genes,Mapping rate
Identifier,Actb,Gadph,Metabolic process,#Detected Genes,Mapping rate
ola_mES_2i_1_1.counts,0.006805259,0.00015504,0.023556419,11947,0.775385676
ola_mES_2i_1_10.counts,0.008647994,0.00023053,0.030453829,11894,0.790886637
ola_mES_2i_1_11.counts,0.009517874,0.000172775,0.02402108,12321,0.774220237
Expand Down
2 changes: 1 addition & 1 deletion example_data/labeled_Kolodziejczyk.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
,Actb,Gadph,Metabolic process,#Detected Genes,Mapping rate,Quality
Identifier,Actb,Gadph,Metabolic process,#Detected Genes,Mapping rate,Quality
ola_mES_2i_1_1.counts,0.006805259,0.00015504,0.023556419,11947,0.775385676,1
ola_mES_2i_1_10.counts,0.008647994,0.00023053,0.030453829,11894,0.790886637,1
ola_mES_2i_1_11.counts,0.009517874,0.000172775,0.02402108,12321,0.774220237,1
Expand Down
14 changes: 10 additions & 4 deletions runEnsembleKQC.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import itertools
import time
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.metrics.pairwise import cosine_similarity
Expand Down Expand Up @@ -72,6 +73,10 @@ def run_enumeration_round_wrapper(args):
cell_num = len(mat[0])
print("{} features, {} cells".format(feature_num, cell_num))

# Create a new DataFrame with the 'Identifier' column
identifiers = pd.read_csv(file_name, usecols=['Identifier'])
result_df = identifiers.copy()

mat = min_max_scale(mat)
tmat = np.array(list(zip(*mat)))
similarity = cosine_similarity(tmat)
Expand Down Expand Up @@ -103,7 +108,7 @@ def run_enumeration_round_wrapper(args):
print(f"Total enumeration rounds: {total_rounds}")

# Limit the number of enumeration rounds to 5
enumeration_list = enumeration_list[:10]
enumeration_list = enumeration_list[:5]

def progress_wrapper(args):
result = run_enumeration_round_wrapper(args)
Expand All @@ -121,10 +126,11 @@ def progress_wrapper(args):
precision, recall, F1 = calc_pre_recall_F1(result, label)
print('precision {:2f} recall {:2f} F1Score {:2f}'.format(precision, recall, F1))

# Add the results as a new column to the DataFrame
result_df['Quality'] = ['cell' if res == 1 else 'damaged' for res in result]

# Store the result to the output path
if result_path:
with open(result_path, 'w') as f:
f.write('Quality\n')
f.writelines('\n'.join(map(str, result)))
result_df.to_csv(result_path, index=False, header=False)
end = time.time()
print('Done. Total time: {:2f}s. Results have been stored in {}'.format(end - start, result_path))

0 comments on commit 5b4f0a0

Please sign in to comment.