-
Notifications
You must be signed in to change notification settings - Fork 2
/
joint_clustering.py
76 lines (59 loc) · 3 KB
/
joint_clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import shutil
import subprocess
import sys
import pandas as pd
from config.config import OUTPUT_DIR, local, resolution
from reading import get_project_info
from utils import safe_mkdir
# function that creates all the relevant directories
def prepare_joint_dir(project, tissue, res):
# directory with unfiltered clustering
source_dir = OUTPUT_DIR + project + "/" + tissue + "/" + str(res) + "-none-0/"
task_directory = "{}-joint_clustering".format(res) # name of the directory for this task
task_name = tissue + "-" + task_directory # task name to put on plots
results_dir = OUTPUT_DIR + project + "/" + tissue + "/" + task_directory + "/" # directory for saving output
# create all the directories, if they dont exist
print("Creating Joint Directories")
safe_mkdir(results_dir)
# copy files
for file in ("!cells.csv", "!clusters.csv", "!markers.csv"):
assert os.path.isfile(source_dir + "!cells.csv") # check if file exists
shutil.copyfile(source_dir + file, results_dir + file)
return task_directory, task_name, results_dir
# assign colors for joint clustering plot based on which methods retained which cells
def assign_colors(adata, project, tissue, res):
mad_cells = pd.read_csv(OUTPUT_DIR + project + "/" + tissue + "/" + str(res) + "-mad-2/!cells.csv")
cutoff_cells = pd.read_csv(OUTPUT_DIR + project + "/" + tissue + "/" + str(res) + "-cutoff-10/!cells.csv")
adata["color"] = "Neither"
adata.loc[mad_cells["barcodekey"], "color"] = "MAD2 only"
adata.loc[cutoff_cells["barcodekey"], "color"] = "Cutoff only"
adata.loc[list(set(cutoff_cells["barcodekey"]).intersection(set(mad_cells["barcodekey"]))), "color"] = "All"
return adata
def joint_main(project, task_id, tissue=None):
if tissue is None:
tissue = get_project_info(project, task_id=task_id)[0]
print("joint clustering task.id:{} - tissue:{}, res:{}, project:{}".format(task_id, tissue, resolution, project))
task_directory, task_name, results_dir = prepare_joint_dir(project, tissue, resolution)
adata = pd.read_csv(results_dir + "!cells.csv")
adata.set_index(adata["barcodekey"], inplace=True)
assign_colors(adata, project, tissue, resolution)
print("Writing results")
with open(results_dir + "!cells.csv", "w") as fout:
fout.write(adata.to_csv())
print(
subprocess.check_output("Rscript plotting.R {} {} {}".format(task_name, results_dir, "joint"),
shell=True).decode('UTF-8'))
if __name__ == '__main__':
if local: # for debug outside of cluster
proj = input("Project: ").strip()
t_id = int(input("Task ID (-1 to input specific tissue): ").strip())
if t_id == -1:
tiss = input("Tissue: ").strip()
joint_main(proj, 0, tissue=tiss)
else:
joint_main(proj, t_id)
else: # project and task id are provided as commandline args
proj = sys.argv[1]
t_id = int(sys.argv[2]) - 1
joint_main(proj, t_id)