Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 18 additions & 17 deletions DashML/Basecall/Basecall_Bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from DashML.locks import PLOT_LOCK

class Modification_Bias:

Expand Down Expand Up @@ -199,21 +200,21 @@ def plot_df(self, df, curr_seq):
df = df.loc[df['Number of Mismatches'] > df['Number of Mismatches'].mean()]
df['Mismatches'] = df['Mismatches'] + df['Unmodified Structure']
#print(df)

fig = df.plot(x='Position', y = 'Number of Mismatches', kind="bar", rot=45, title= curr_seq + " Mismatches by Structure", figsize=(12,8))
# fig = df.plot(kind="bar", rot=90, title="Modification Rates by Position", figsize=(12,8), stacked=True)
#### save plot #####
i = 0
score = df['Mismatches'].to_numpy()
for p in fig.patches:
fig.annotate(score[i], xy=(p.get_x(), p.get_height()))
i += 1

fig = plt.gcf()

if not os.path.exists(self.save_path):
os.makedirs(self.save_path)

figname = os.path.join(self.save_path + '/'+ curr_seq + '_' + 'Position_Structure_Modification_Rate' + '.png')
fig.savefig(figname)
with PLOT_LOCK:
fig = df.plot(x='Position', y = 'Number of Mismatches', kind="bar", rot=45, title= curr_seq + " Mismatches by Structure", figsize=(12,8))
# fig = df.plot(kind="bar", rot=90, title="Modification Rates by Position", figsize=(12,8), stacked=True)
#### save plot #####
i = 0
score = df['Mismatches'].to_numpy()
for p in fig.patches:
fig.annotate(score[i], xy=(p.get_x(), p.get_height()))
i += 1

fig = plt.gcf()

if not os.path.exists(self.save_path):
os.makedirs(self.save_path)

figname = os.path.join(self.save_path + '/'+ curr_seq + '_' + 'Position_Structure_Modification_Rate' + '.png')
fig.savefig(figname)
#plt.show)
146 changes: 75 additions & 71 deletions DashML/Basecall/Basecall_Plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from DashML.locks import PLOT_LOCK

def plot_modification(mods, dir_name="Default", save_path="./", mod_type="Modifications", seq_name="Sequence"):
### deprecated ####
Expand Down Expand Up @@ -48,23 +49,24 @@ def plot_modification(mods, dir_name="Default", save_path="./", mod_type="Modifi
if not os.path.exists(dir_name):
os.makedirs(dir_name)

curr_pos = 0
for i in range(num_plots):
mod_pos = np.array(positions[curr_pos:curr_pos+positions_size])
mod_num = np.array(mods[curr_pos:curr_pos+positions_size])
df = pd.DataFrame({'Reference Nucleotide Position': mod_pos,
'Number of Modifications': mod_num})
ax = df.plot.bar(x='Reference Nucleotide Position', y='Number of Modifications', rot=90, figsize=(figlen, 10),
color=bar_color, xlabel='Reference Nucleotide Position', ylabel='Number of ' + mod_type,
title=seq_name +' ' + mod_type, legend=False)
# pps = ax.bar(X, np.array(mods[curr_pos:curr_pos+positions_size]),
# color=bar_color, align="center", edgecolor="black", width=.4)
#ax.bar_label(ax, label_type='edge')
curr_pos = curr_pos + positions_size
fig = plt.gcf()
#plt.showblock=False)
figname = os.path.join(dir_name + '/' + seq_name + '_' + mod_type + '_' + str(i) + '.png')
fig.savefig(figname)
with PLOT_LOCK:
curr_pos = 0
for i in range(num_plots):
mod_pos = np.array(positions[curr_pos:curr_pos+positions_size])
mod_num = np.array(mods[curr_pos:curr_pos+positions_size])
df = pd.DataFrame({'Reference Nucleotide Position': mod_pos,
'Number of Modifications': mod_num})
ax = df.plot.bar(x='Reference Nucleotide Position', y='Number of Modifications', rot=90, figsize=(figlen, 10),
color=bar_color, xlabel='Reference Nucleotide Position', ylabel='Number of ' + mod_type,
title=seq_name +' ' + mod_type, legend=False)
# pps = ax.bar(X, np.array(mods[curr_pos:curr_pos+positions_size]),
# color=bar_color, align="center", edgecolor="black", width=.4)
#ax.bar_label(ax, label_type='edge')
curr_pos = curr_pos + positions_size
fig = plt.gcf()
#plt.showblock=False)
figname = os.path.join(dir_name + '/' + seq_name + '_' + mod_type + '_' + str(i) + '.png')
fig.savefig(figname)


def plot_modification_summary(mods, dir_name="Default", save_path="./", \
Expand All @@ -90,28 +92,28 @@ def plot_modification_summary(mods, dir_name="Default", save_path="./", \
dir_name = save_path + dir_name + '_Modification_Plots'
if not os.path.exists(dir_name):
os.makedirs(dir_name)

curr_pos = 0
for i in range(num_plots):
mod_pos = np.array(positions[curr_pos:curr_pos + positions_size])
mod_del = np.array(dels[curr_pos:curr_pos + positions_size])
mod_ins = np.array(ins[curr_pos:curr_pos + positions_size])
mod_mis = np.array(mismatch[curr_pos:curr_pos + positions_size])

df = pd.DataFrame({'Reference Nucleotide Position': mod_pos,
'Deletions': mod_del, 'Insertions': mod_ins,
'Mismatches': mod_mis})
ax = df.plot.bar(x='Reference Nucleotide Position', rot=90, figsize=(figlen, 10),
xlabel='Reference Nucleotide Position', ylabel='Number of ' + mod_type,
title=seq_name + ' ' + mod_type, legend=True, ylim=(0,.01))
# pps = ax.bar(X, np.array(mods[curr_pos:curr_pos+positions_size]),
# color=bar_color, align="center", edgecolor="black", width=.4)
# ax.bar_label(ax, label_type='edge')
curr_pos = curr_pos + positions_size
fig = plt.gcf()
#plt.showblock=False)
figname = os.path.join(dir_name + '/' + seq_name + '_' + mod_type + '_' + str(i) + '.png')
fig.savefig(figname)
with PLOT_LOCK:
for i in range(num_plots):
mod_pos = np.array(positions[curr_pos:curr_pos + positions_size])
mod_del = np.array(dels[curr_pos:curr_pos + positions_size])
mod_ins = np.array(ins[curr_pos:curr_pos + positions_size])
mod_mis = np.array(mismatch[curr_pos:curr_pos + positions_size])

df = pd.DataFrame({'Reference Nucleotide Position': mod_pos,
'Deletions': mod_del, 'Insertions': mod_ins,
'Mismatches': mod_mis})
ax = df.plot.bar(x='Reference Nucleotide Position', rot=90, figsize=(figlen, 10),
xlabel='Reference Nucleotide Position', ylabel='Number of ' + mod_type,
title=seq_name + ' ' + mod_type, legend=True, ylim=(0,.01))
# pps = ax.bar(X, np.array(mods[curr_pos:curr_pos+positions_size]),
# color=bar_color, align="center", edgecolor="black", width=.4)
# ax.bar_label(ax, label_type='edge')
curr_pos = curr_pos + positions_size
fig = plt.gcf()
#plt.showblock=False)
figname = os.path.join(dir_name + '/' + seq_name + '_' + mod_type + '_' + str(i) + '.png')
fig.savefig(figname)


# #### plot modifications #####
Expand Down Expand Up @@ -189,38 +191,40 @@ def plot_mismatch(mismatches, seq_name="Sequence", dir_name="Default", save_path
#plt.showblock=False)

def plot_average_mod_rate(df, dir_name="Default", save_path="./"):
plt.rcParams.update({'font.size': 20})
df = df.drop("Condition", axis=1)
#x = df.values # returns a numpy array
#min_max_scaler = preprocessing.MinMaxScaler()
#x_scaled = min_max_scaler.fit_transform(x)
#df = pd.DataFrame(x_scaled)
df = df.iloc[:,:] * 100
fig = df.plot(kind="bar", rot=30, title="Overall Modification Rates", figsize=(12, 12))
#### save plot #####
fig = plt.gcf()
dir_name = save_path + dir_name + '_Modification_Plots'
if not os.path.exists(dir_name):
os.makedirs(dir_name)
figname = os.path.join(dir_name + '/Average_Modification_Rate' + '.png')
fig.savefig(figname)
#plt.showblock=False)
with PLOT_LOCK:
plt.rcParams.update({'font.size': 20})
df = df.drop("Condition", axis=1)
#x = df.values # returns a numpy array
#min_max_scaler = preprocessing.MinMaxScaler()
#x_scaled = min_max_scaler.fit_transform(x)
#df = pd.DataFrame(x_scaled)
df = df.iloc[:,:] * 100
fig = df.plot(kind="bar", rot=30, title="Overall Modification Rates", figsize=(12, 12))
#### save plot #####
fig = plt.gcf()
dir_name = save_path + dir_name + '_Modification_Plots'
if not os.path.exists(dir_name):
os.makedirs(dir_name)
figname = os.path.join(dir_name + '/Average_Modification_Rate' + '.png')
fig.savefig(figname)
#plt.showblock=False)

def plot_average_mod_by_pos_rate(df, dir_name="Default", save_path="./"):
plt.rcParams.update({'font.size': 20})
#x = df.values # returns a numpy array
#min_max_scaler = preprocessing.MinMaxScaler()
#x_scaled = min_max_scaler.fit_transform(x)
#df = pd.DataFrame(x_scaled)
df = df.iloc[:, :] * 100
fig = df.plot(kind="line", rot=45, title="Modification Rates by Position", subplots=True, figsize=(12,8),
ylim=(0,100), color='purple')
#fig = df.plot(kind="bar", rot=90, title="Modification Rates by Position", figsize=(12,8), stacked=True)
#### save plot #####
fig = plt.gcf()
dir_name = save_path + dir_name + '_Modification_Plots'
if not os.path.exists(dir_name):
os.makedirs(dir_name)
figname = os.path.join(dir_name + '/Position_Modification_Rate' + '.png')
fig.savefig(figname)
#plt.showblock=False)
with PLOT_LOCK:
plt.rcParams.update({'font.size': 20})
#x = df.values # returns a numpy array
#min_max_scaler = preprocessing.MinMaxScaler()
#x_scaled = min_max_scaler.fit_transform(x)
#df = pd.DataFrame(x_scaled)
df = df.iloc[:, :] * 100
fig = df.plot(kind="line", rot=45, title="Modification Rates by Position", subplots=True, figsize=(12,8),
ylim=(0,100), color='purple')
#fig = df.plot(kind="bar", rot=90, title="Modification Rates by Position", figsize=(12,8), stacked=True)
#### save plot #####
fig = plt.gcf()
dir_name = save_path + dir_name + '_Modification_Plots'
if not os.path.exists(dir_name):
os.makedirs(dir_name)
figname = os.path.join(dir_name + '/Position_Modification_Rate' + '.png')
fig.savefig(figname)
#plt.showblock=False)
4 changes: 3 additions & 1 deletion DashML/GUI/DT.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,11 +1336,13 @@ def handle_start_worker(self, params):
source = params.get('source')
if source == 'basecall':
worker = BasecallWorker(
source,
params['lid'], params['contig'], params['basecall_path'],
params['modification'], params['modification2']
)
elif source == 'signal':
worker = SignalWorker(
source,
params['lid'], params['contig'], params['signal_path'],
params['modification'], params['modification2']
)
Expand Down Expand Up @@ -1375,7 +1377,7 @@ def on_worker_finished(self, source, gpath1, gpath2):
self.load_basecall_section.basecall_graph2 = new_graph2
else:
layout = self.load_signal_section.signal_graphs_layout
old1, old2 = self.load_signal_section.signal_graph1, self.load_basecall_section.signal_graph2
old1, old2 = self.load_signal_section.signal_graph1, self.load_signal_section.signal_graph2
new_graph1 = self.load_signal_section.create_sample_graph(
f"{source.capitalize()}: Average Signal Rates by Position", gpath1)
new_graph2 = self.load_signal_section.create_sample_graph(
Expand Down
48 changes: 25 additions & 23 deletions DashML/Landscape/Cluster/Centroid_ConservedRegions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import varnaapi
import DashML.Database_fx.Insert_DB as dbins
import DashML.Database_fx.Select_DB as dbsel
from DashML.locks import PLOT_LOCK

#### Must Load Varna Jar locally #####
varna_path = files("DashML.Varna") / "VARNAv3-93.jar"
Expand Down Expand Up @@ -143,29 +144,30 @@ def save_bpseq(lid, seq, seqlen, structure, metric):
return

def get_vplot(lid, seq, seq_len, sequence, ss, cons_ss, cons_bp, metric):
v = varnaapi.Structure(structure=ss, sequence=sequence)
v.update(resolution=10, zoom=1, algorithm='radiate', flat=True)
save_bpseq(lid, seq, seqlen=seq_len, structure=v, metric=metric)
out_fig = save_path + str(lid) + "_" + seq + "_" + metric + "_conserved.png"
v.dump_param(save_path + str(lid) + "_" + seq + "_" + metric + "_conserved.yml")
#annotating high reactivity regions.
for r in cons_ss:
if r < seq_len:
r = r + 1
v.add_highlight_region(r, r, fill='#f16849', outline='#f16849')
#conserved inaccessible regions
for r in cons_bp:
if r < seq_len:
r = r + 1
v.add_highlight_region(r, r, fill='#c5def2', outline='#c5def2')
#v.add_colormap(values=np.arange(1, 10), vmin=30, vmax=40, style='bw')
#values is an array where each position indicates color 0-n
#overall style is applied
# annotating interactions
cmap = np.ones(seq_len)
v.add_colormap(values=[3], style='energy')
#v.add_aux_BP(1, 10, color='red')
v.savefig(out_fig)
with PLOT_LOCK:
v = varnaapi.Structure(structure=ss, sequence=sequence)
v.update(resolution=10, zoom=1, algorithm='radiate', flat=True)
save_bpseq(lid, seq, seqlen=seq_len, structure=v, metric=metric)
out_fig = save_path + str(lid) + "_" + seq + "_" + metric + "_conserved.png"
v.dump_param(save_path + str(lid) + "_" + seq + "_" + metric + "_conserved.yml")
#annotating high reactivity regions.
for r in cons_ss:
if r < seq_len:
r = r + 1
v.add_highlight_region(r, r, fill='#f16849', outline='#f16849')
#conserved inaccessible regions
for r in cons_bp:
if r < seq_len:
r = r + 1
v.add_highlight_region(r, r, fill='#c5def2', outline='#c5def2')
#v.add_colormap(values=np.arange(1, 10), vmin=30, vmax=40, style='bw')
#values is an array where each position indicates color 0-n
#overall style is applied
# annotating interactions
cmap = np.ones(seq_len)
v.add_colormap(values=[3], style='energy')
#v.add_aux_BP(1, 10, color='red')
v.savefig(out_fig)
#v.show()


Expand Down
36 changes: 19 additions & 17 deletions DashML/Landscape/Cluster/Centroid_Putative.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from importlib.resources import files
import DashML.Database_fx.Insert_DB as dbins
import DashML.Database_fx.Select_DB as dbsel
from DashML.locks import PLOT_LOCK

######### Draws Varna Images of Predicted Secondary Structures #####
# TODO: Different from Native Images
Expand Down Expand Up @@ -74,23 +75,24 @@ def draw_structure(df, method='hamming'):
print(out)
clusters = df['cluster'].unique()
for cluster in clusters:
print(cluster)
ss = str(df.loc[df['cluster'] == cluster, 'secondary'].unique()[0])
v = varnaapi.Structure(structure=ss, sequence=sequence)
v.update(resolution=10, zoom=1)
out_fig = out + seq + "_" + str(cluster) + ".png"
save_bpseq(seq, seqlen, cluster, v, out)

# annotating high reactivity regions.
# v.add_highlight_region(11, 21)
# v.add_colormap(values=np.arange(1, 10), vmin=30, vmax=40, style='bw')
# values is an array where each position indicates color 0-n
# overall style is applied
# annotating interactions
# v.add_colormap(values=[2,5,5,5,5, 0, 0, 0, 0, 3,3 ,3],style='energy')
# v.add_aux_BP(1, 10, color='red')
v.savefig(out_fig)
v.show()
with PLOT_LOCK:
print(cluster)
ss = str(df.loc[df['cluster'] == cluster, 'secondary'].unique()[0])
v = varnaapi.Structure(structure=ss, sequence=sequence)
v.update(resolution=10, zoom=1)
out_fig = out + seq + "_" + str(cluster) + ".png"
save_bpseq(seq, seqlen, cluster, v, out)

# annotating high reactivity regions.
# v.add_highlight_region(11, 21)
# v.add_colormap(values=np.arange(1, 10), vmin=30, vmax=40, style='bw')
# values is an array where each position indicates color 0-n
# overall style is applied
# annotating interactions
# v.add_colormap(values=[2,5,5,5,5, 0, 0, 0, 0, 3,3 ,3],style='energy')
# v.add_aux_BP(1, 10, color='red')
v.savefig(out_fig)
v.show()
# sys.exit(0)

# dataframes of centroids
Expand Down
Loading
Loading