Skip to content

Commit

Permalink
figure ready versions
Browse files Browse the repository at this point in the history
  • Loading branch information
bluenote-1577 committed Oct 18, 2023
1 parent c9abd95 commit 9e7f05f
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 228 deletions.
252 changes: 126 additions & 126 deletions results_oct15/chng-fungi.tsv

Large diffs are not rendered by default.

65 changes: 41 additions & 24 deletions scripts/chng_between_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@

cmap = sns.color_palette("muted")

boxplot_covs = False
other_ind = 3

np.random.seed(0)
def rand_jitter(arr):
Expand Down Expand Up @@ -51,7 +53,6 @@ class result:
#metadata = '/home/jshaw/projects/sylph_test/all_case_control_table_nofail.txt'
#metadata = '/home/jshaw/projects/sylph_test/spt_case.txt'

boxplot_covs = True

prev_file = None
for line in open(metadata,'r'):
Expand All @@ -64,11 +65,14 @@ class result:
read_file_to_pair_dict[file] = prev_file
prev_file = None
read_file_to_status[file] = case_control
print(len(read_file_to_status))

prita_files = [
#"/home/jshaw/projects/sylph_test/results/c100-fungi+aureus.tsv",
#"results/c100-fungi-jul2.txt",
"results_oct15/chng-fungi.tsv",
#"results_oct15/chng-fungi_c100.tsv",
#"x"
]

for file in prita_files:
Expand Down Expand Up @@ -146,16 +150,16 @@ class result:
case_res_n.append(res.naive_ani)

else:
case_res.append(res.mean_cov)
case_res_n.append(res.mean_cov)
case_res.append(res.eff_cov)
case_res_n.append(res.eff_cov)
else:
if not boxplot_covs:
control_res.append(res.final_ani)
control_res_n.append(res.naive_ani)

else:
control_res.append(res.mean_cov)
control_res_n.append(res.mean_cov)
control_res.append(res.eff_cov)
control_res_n.append(res.eff_cov)

res_used_pairs.add(tuple(pair))
if 'globo' in res.contig_name:
Expand All @@ -170,21 +174,23 @@ class result:
case_globo_n.append(res.naive_ani)

else:
case_globo.append(res.mean_cov)
case_globo_n.append(res.mean_cov)
case_globo.append(res.eff_cov)
case_globo_n.append(res.eff_cov)

else:
if not boxplot_covs:
control_globo.append(res.final_ani)
control_globo_n.append(res.naive_ani)
else:
control_globo.append(res.mean_cov)
control_globo_n.append(res.mean_cov)
control_globo.append(res.eff_cov)
control_globo_n.append(res.eff_cov)

globo_used_pairs.add(tuple(pair))

print(case_res)
print(len(globo_used_pairs))
#print(len(globo_used_pairs))
print(list(pair_to_res.values()))
print(pair_to_res)

sc = np.array(list(pair_to_res.values()))
sc2 = np.array(list(pair_to_res_naive.values()))
Expand All @@ -194,9 +200,9 @@ class result:

s = 7
ax[0][0].scatter(sc[:,0], sc[:,1], alpha = 1.00, s = s, color = cmap[0], label = 'sylph adjusted')
ax[0][1].scatter(sc2[:,0], sc2[:,1], alpha = 1.00, s = s, color = cmap[3], label = 'naive containment')
ax[0][1].scatter(sc2[:,0], sc2[:,1], alpha = 1.00, s = s, color = cmap[other_ind], label = 'Naive containment')
ax[1][0].scatter(sc_g[:,0], sc_g[:,1], alpha = 1.00, s = s, color = cmap[0], label = 'sylph adjusted')
ax[1][1].scatter(sc2_g[:,0], sc2_g[:,1], alpha = 1.00, s = s, color = cmap[3], label = 'naive containment')
ax[1][1].scatter(sc2_g[:,0], sc2_g[:,1], alpha = 1.00, s = s, color = cmap[other_ind], label = 'Naive containment')
ax[0][0].plot([82,100],[82,100],'--',c='black')
ax[1][0].plot([82,100],[82,100],'--',c='black')
ax[0][1].plot([82,100],[82,100],'--',c='black')
Expand Down Expand Up @@ -234,7 +240,7 @@ class result:
#b.legend(frameon=False, loc='lower right')
#fig.tight_layout()
fig.tight_layout(rect=(0,0,1,0.95))
plt.savefig("figures/chng_left_right_concordance.pdf")
plt.savefig("figures/chng_left_right_concordance.svg")
plt.show()


Expand Down Expand Up @@ -291,14 +297,25 @@ def add_stat_annotation(ax, bp, pval):
# Create the figure and axes
fig, ax = plt.subplots(2,2, figsize=(8*cm, 8*cm))

case_res = [x[0]/2 + x[1]/2 for (a,x) in pair_to_res.items() if read_file_to_status[a[0]] == 'Case']
case_globo = [x[0]/2 + x[1]/2 for (a,x) in pair_to_res_globo.items() if read_file_to_status[a[0]] == 'Case']
control_res = [x[0]/2 + x[1]/2 for (a,x) in pair_to_res.items() if read_file_to_status[a[0]] == 'Control']
control_globo = [x[0]/2 + x[1]/2 for (a,x) in pair_to_res_globo.items() if read_file_to_status[a[0]] == 'Control']

case_res_n = [x[0]/2 + x[1]/2 for (a,x) in pair_to_res_naive.items() if read_file_to_status[a[0]] == 'Case']
case_globo_n = [x[0]/2 + x[1]/2 for (a,x) in pair_to_res_naive_globo.items() if read_file_to_status[a[0]] == 'Case']
control_res_n = [x[0]/2 + x[1]/2 for (a,x) in pair_to_res_naive.items() if read_file_to_status[a[0]] == 'Control']
control_globo_n = [x[0]/2 + x[1]/2 for (a,x) in pair_to_res_naive_globo.items() if read_file_to_status[a[0]] == 'Control']


# Add the boxplots to the axes
bp1 = ax[0][0].boxplot([case_res, control_res], patch_artist=True, boxprops=dict(facecolor=cmap[0]), labels = ['case', 'control'])
bp2 = ax[0][1].boxplot([case_res_n, control_res_n], patch_artist=True, boxprops=dict(facecolor=cmap[3]),labels = ['case', 'control'])
bp3 = ax[1][0].boxplot([case_globo, control_globo], patch_artist=True, boxprops=dict(facecolor=cmap[0]), labels = ['case','control'])
bp4 = ax[1][1].boxplot([case_globo_n, control_globo_n], patch_artist=True, boxprops=dict(facecolor=cmap[3]), labels = ['case', 'control'])
bp1 = ax[0][0].boxplot([case_res, control_res], patch_artist=True, boxprops=dict(facecolor=cmap[0]), labels = ['Case', 'Control'])
bp2 = ax[0][1].boxplot([case_res_n, control_res_n], patch_artist=True, boxprops=dict(facecolor=cmap[other_ind]),labels = ['Case', 'Control'])
bp3 = ax[1][0].boxplot([case_globo, control_globo], patch_artist=True, boxprops=dict(facecolor=cmap[0]), labels = ['Case','Control'])
bp4 = ax[1][1].boxplot([case_globo_n, control_globo_n], patch_artist=True, boxprops=dict(facecolor=cmap[other_ind]), labels = ['Case', 'Control'])
boxplots = [bp1, bp2, bp3, bp4]

print(f"n = {len(case_res) + len(case_globo)}")
print(f"n = {len(case_res) + len(control_globo)}")

lines_labels = [ax.get_legend_handles_labels() for ax in fig.axes[0:2]]
lines, labels = [sum(lol, []) for lol in zip(*lines_labels)]
Expand All @@ -317,10 +334,10 @@ def add_stat_annotation(ax, bp, pval):
#for k in range(2):
#ax[l][k].set_ylim([85,103])

ax[0][0].text(.05, 1.15, rf"M. restricta ", ha='left', va='top', transform=ax[0][0].transAxes, fontsize = 8)
ax[0][1].text(.05, 1.15, 'M. restricta', ha='left', va='top', transform=ax[0][1].transAxes, fontsize = 8)
ax[1][1].text(.05, 1.15, 'M. globosa', ha='left', va='top', transform=ax[1][1].transAxes, fontsize = 8)
ax[1][0].text(.05, 1.15, 'M. globosa', ha='left', va='top', transform=ax[1][0].transAxes, fontsize = 8)
ax[0][0].text(.05, 1.15, rf"M. restricta ", ha='left', va='top', transform=ax[0][0].transAxes, fontsize = 7)
ax[0][1].text(.05, 1.15, 'M. restricta', ha='left', va='top', transform=ax[0][1].transAxes, fontsize = 7)
ax[1][1].text(.05, 1.15, 'M. globosa', ha='left', va='top', transform=ax[1][1].transAxes, fontsize = 7)
ax[1][0].text(.05, 1.15, 'M. globosa', ha='left', va='top', transform=ax[1][0].transAxes, fontsize = 7)



Expand All @@ -347,7 +364,7 @@ def add_stat_annotation(ax, bp, pval):
for b in a:
b.spines[['right', 'top']].set_visible(False)

fig.legend([bp1["boxes"][0], bp2["boxes"][0]], ['sylph adjusted', 'naive containment'], loc='upper center', frameon=False, bbox_transform = plt.gcf().transFigure, ncol = 2)
fig.legend([bp1["boxes"][0], bp2["boxes"][0]], ['sylph adjusted', 'Naive containment'], loc='upper center', frameon=False, bbox_transform = plt.gcf().transFigure, ncol = 2)
fig.tight_layout(rect=(0,0,1,0.95))
plt.savefig("figures/chng_adjusted_ani_pvalues.pdf")
plt.savefig("figures/chng_adjusted_ani_pvalues.svg")
plt.show()
37 changes: 14 additions & 23 deletions scripts/diagonal_ani_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class result:
cm = 1/2.54 # centimeters in inches\n",
##Change this to get a bigger figure. \n",
cmap = sns.color_palette("muted")
plt.rcParams.update({'font.size': 7})
plt.rcParams.update({'font.size': 7.0})
plt.rcParams.update({'figure.autolayout': True})
plt.rcParams.update({'font.family':'arial'})

Expand All @@ -50,15 +50,6 @@ class result:
]


sylph_files = [
"gtdb-on-reads/gtdb-on-ill-c100.tsv",
"gtdb-on-reads/gtdb-on-ill-c1000.tsv",
"gtdb-on-reads/gtdb-on-nano-c100.tsv",
"gtdb-on-reads/gtdb-on-nano-c1000.tsv",
"gtdb-on-reads/gtdb-on-pac-c100.tsv",
"gtdb-on-reads/gtdb-on-pac-c1000.tsv",
]

truth_files = [
"gtdb-on-reads/true-gtdb-on-on-mock-c100.tsv",
#"gtdb-on-reads/true-gtdb-on-on-mock.tsv",
Expand Down Expand Up @@ -146,7 +137,7 @@ class result:
true_results[-1].append(res)


fig, ax = plt.subplots(1, 3, figsize = (16* cm , 6 * cm), sharey = True, sharex = True)
fig, ax = plt.subplots(1, 3, figsize = (16* cm , 7 * cm), sharey = True, sharex = True)
s = 8

for (i,name) in enumerate(['Illumina', 'Nanopore-old', 'PacBio']):
Expand Down Expand Up @@ -213,18 +204,18 @@ class result:
good_lr = stats.linregress(x,y)
naive_lr = stats.linregress(x,z)

ax[i].set_xlabel("True containment ANI")
if j == 1:
ax[i].scatter(x,y,s = s, color = cmap[2], alpha = 0.5, label = rf"c = 1000, $R^2$ = {round(good_lr.rvalue**2,3)}")
ax[i].set_xlabel("True 31-mer ANI")
else:
if i == 0:
ax[i].set_title("Illumina")
ax[i].set_title("Illumina", fontsize = plt.rcParams['font.size'])
elif i == 1:
ax[i].set_title("Nanopore-old")
ax[i].set_title("Nanopore-old", fontsize = plt.rcParams['font.size'])
elif i == 2:
ax[i].set_title("PacBio")
ax[i].set_title("PacBio", fontsize = plt.rcParams['font.size'])

ax[i].scatter(x,y,s = s, color = cmap[0], alpha = 0.5, label = rf"c = 100, $R^2$ = {round(good_lr.rvalue**2,3)}")
ax[i].scatter(x,y,s = s, color = cmap[0], alpha = 0.5, label = rf"sylph, $R^2$ = {round(good_lr.rvalue**2,3)}")
ax[i].scatter(x,z, s= s, color = cmap[3], alpha = 0.5, label = rf"Naive, $R^2$ = {round(naive_lr.rvalue**2,3)}")
ax[i].plot([90,100],[90,100],'--', c = 'black')
print('covered: ' + str(covered/total) + 'total: ' + str(total))
Expand All @@ -234,18 +225,18 @@ class result:


else:
ax[i].set_xlabel("Estimated lambda")
if j == 1:
ax[i].scatter(x_lam,y_diff,s = s, color = cmap[2], alpha = 0.5, label = 'c = 1000')
ax[i].set_xlabel("Estimated lambda")
else:
if i == 0:
ax[i].set_title("Illumina")
ax[i].set_title("Illumina", fontsize = plt.rcParams['font.size'])
elif i == 1:
ax[i].set_title("Nanopore-old")
ax[i].set_title("Nanopore-old", fontsize = plt.rcParams['font.size'])
elif i == 2:
ax[i].set_title("PacBio")
ax[i].set_title("PacBio", fontsize = plt.rcParams['font.size'])

ax[i].scatter(x_lam,y_diff,s = s, color = cmap[0], alpha = 0.5, label = 'c = 100')
ax[i].scatter(x_lam,y_diff,s = s, color = cmap[0], alpha = 0.5, label = 'sylph')
ax[i].scatter(x_lam,z_diff, s= s, color = cmap[3], alpha = 0.5, label = 'Naive')
ax[i].set_xscale('log')
ax[i].axhline(0, c = 'black', ls = '--')
Expand All @@ -264,9 +255,9 @@ class result:

#ax[j][i].set_ylim([85,100])
if plt_diag:
plt.savefig("figures/mock_diag.pdf")
plt.savefig("figures/mock_diag.svg")
else:
plt.savefig("figures/mock_deviation.pdf")
plt.savefig("figures/mock_deviation.svg")
plt.show()

#plt.scatter(x_lam, y_diff)
Expand Down
6 changes: 3 additions & 3 deletions scripts/manhat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import numpy as np
cmap = plt.get_cmap('tab20')
plt.set_cmap(cmap)
q = 0.05

def fdr(p_vals, alpha ):
not_used, pvals = ss.multitest.fdrcorrection(p_vals, alpha = alpha)
Expand Down Expand Up @@ -104,7 +105,6 @@ def fdr(p_vals, alpha ):
s = sorted(pvals, reverse=True)
count = 0
val = 0
q = 0.05
for p in s:
if (len(pvals) - count) / len(pvals) * q > 10**p:
val = p
Expand Down Expand Up @@ -160,11 +160,11 @@ def fdr(p_vals, alpha ):

fig, ax = plt.subplots(figsize = (6.5* cm , 6.5 * cm))
plt.ylabel("-log10(q-val)")
plt.xlabel("Agathobacter rectalis")
plt.xlabel("Agathobacter rectalis MAGs sorted by similarity")
mag_c = mag_to_c[agatho_rep]
#print(mag_c)
it_tab20 = [plt.cm.tab20(i) for i in range(20)]
plt.scatter(range(len(agatho_qvals)), -np.array(agatho_qvals), c = [it_tab20[mag_c+7] for x in agatho_qvals], s = size, cmap = cmap)
plt.scatter(range(len(agatho_qvals)), -np.array(agatho_qvals), c = [it_tab20[mag_c] for x in agatho_qvals], s = size, cmap = cmap)
plt.xticks([])
ax.spines[['right', 'top']].set_visible(False)
plt.axhline(-np.log10(q))
Expand Down
22 changes: 12 additions & 10 deletions scripts/mock_community_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
import seaborn as sns
from dataclasses import dataclass
from natsort import natsorted
cmap = sns.color_palette("muted")

np.random.seed(0)
np.random.seed(1)
def rand_jitter(arr):
stdev = .10
return arr + np.random.randn(len(arr)) * stdev
Expand All @@ -33,7 +32,7 @@ class result:
cm = 1/2.54 # centimeters in inches\n",
##Change this to get a bigger figure. \n",
cmap = sns.color_palette("deep")
plt.rcParams.update({'font.size': 6.5})
plt.rcParams.update({'font.size': 7.0})
plt.rcParams.update({'figure.autolayout': True})
plt.rcParams.update({'font.family':'arial'})
results = []
Expand Down Expand Up @@ -127,7 +126,7 @@ class result:


for (i,x) in enumerate(['Illumina', 'Nanopore-old', 'PacBio']):
ax[i].set_title(x)
ax[i].set_title(x, fontsize = plt.rcParams['font.size'])
positions = []
positions.append(i*num_methods - 1)
positions.append(i*num_methods + 0 - offset)
Expand Down Expand Up @@ -180,18 +179,21 @@ class result:
#ax[i].legend(frameon = False,title="# ANI > 99, 95")

labels = []
labels.append("sylph query\n\n" + dot_label[0])
#labels.append("sylph\n-c 1000\n\n" + dot_label[1])
labels.append("mash screen\n\n" + dot_label[1])
labels.append("sourmash\n\n" + dot_label[2])
#labels.append("sylph query\n\n" + dot_label[0])
#labels.append("mash screen\n\n" + dot_label[1])
#labels.append("sourmash\n\n" + dot_label[2])
labels.append("sylph query")
labels.append("mash screen")
labels.append("sourmash")

bp = ax[i].boxplot(boxes, showfliers=False, positions = positions, widths = width, labels=labels)
for median in bp['medians']:
median.set_color('black')
print(median.get_ydata())
ax[i].tick_params(axis='x', labelrotation=0)

if i == 0:
ax[i].set_ylabel('Query ANI')
ax[i].set_ylabel('Estimated ANI')



Expand All @@ -208,7 +210,7 @@ class result:
#plt.scatter(rand_jitter([positions[4*i+3] for x in range(len(box_sour))]), box_sour, s = s)

#ax.boxplot(boxes, showfliers=False, positions = positions, widths = width, vert=False)
plt.savefig("figures/mock_community_box.pdf")
plt.savefig("figures/mock_community_box.svg")
plt.show()


Loading

0 comments on commit 9e7f05f

Please sign in to comment.