Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,9 @@ plot_mutspec192(spectra192)

1. [IQ-Tree2](http://www.iqtree.org/) - efficient software for phylogenomic inference
2. [Genetic codes](https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?chapter=tgencodes#SG1)

## How to cite?

```text
Bogdan Efimenko, Konstantin Popadin, Konstantin Gunbin, NeMu: a comprehensive pipeline for accurate reconstruction of neutral mutation spectra from evolutionary data, Nucleic Acids Research, Volume 52, Issue W1, 5 July 2024, Pages W108–W115, https://doi.org/10.1093/nar/gkae438
```
2 changes: 1 addition & 1 deletion pymutspec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pymutspec import annotation, draw, io, constants, utils

# Define the pymutspec version
_version = "0.0.9"
__version__ = "0.0.11"
3 changes: 2 additions & 1 deletion pymutspec/annotation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from .spectra import (
calculate_mutspec, collapse_mutspec, calc_edgewise_spectra,
complete_sbs192_columns, jackknife_spectra_sampling, collapse_sbs192,
get_cossim, get_eucdist,
get_cossim, get_eucdist, filter_outlier_branches, sample_spectrum,
get_iqr_bounds,
)
from .mut import CodonAnnotation, mutations_summary

Expand Down
46 changes: 46 additions & 0 deletions pymutspec/annotation/spectra.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,52 @@ def calculate_mutspec(
return mutspec


def sample_spectrum(obs_df: pd.DataFrame, exp_freqs,
use_proba=True, use_context=False,
frac=0.5, nreplics=100):
"""
Sample half of branches and calculate spectrum
"""
samples = []
edges = obs_df.AltNode.unique()
n_to_sample = int(len(edges) * frac)
for _ in range(nreplics):
cur_edges = np.random.choice(edges, n_to_sample, replace=False)
obs_smpl = obs_df[obs_df['AltNode'].isin(cur_edges)]
one_spectrum = calculate_mutspec(
obs_smpl, exp_freqs, use_context=use_context, use_proba=use_proba)
samples.append(one_spectrum)

sampled = pd.concat(samples)

quartiles = sampled.groupby('Mut')['MutSpec'].quantile([0.05, 0.5, 0.95]).unstack().rename(
columns={0.05: "MutSpec_q05", 0.5: "MutSpec_median", 0.95: "MutSpec_q95"}).reset_index()
return quartiles


def get_iqr_bounds(series: pd.Series):
"Function calculates Interquartile range (IQR) and used for outliers filtrtation"
q1 = series.quantile(0.25)
q3 = series.quantile(0.75)
iqr = q3 - q1
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 + 1.5 * iqr
return lower_bound, upper_bound


def filter_outlier_branches(obs_df: pd.DataFrame, use_proba=True):
if use_proba:
edge_nobs = obs_df.groupby('AltNode')['ProbaMut'].sum()
else:
edge_nobs = obs_df.groupby('AltNode')['Mut'].count()

_, upper_bound = get_iqr_bounds(edge_nobs)
edge_nobs_flt = edge_nobs[edge_nobs < upper_bound]
selected_branches = edge_nobs_flt.index
obs_df_flt = obs_df[obs_df['AltNode'].isin(selected_branches)]
return obs_df_flt


def collapse_mutspec(ms192: pd.DataFrame):
assert ms192.shape[0] == 192
for c in ["Mut", "ObsFr", "ExpFr"]:
Expand Down
82 changes: 68 additions & 14 deletions pymutspec/draw/spectra.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@

from typing import Iterable

import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from pymutspec.constants import possible_sbs192, possible_sbs96
Expand Down Expand Up @@ -72,20 +73,43 @@ def plot_mutspec12(
ticksize=8,
titlesize=14,
ylabelsize=12,
show=True,
show=True,
ax=None,
**kwargs,
):
# TODO add checks of mutspec12
# TODO add description to all plot* functions

if ax is None:
fig = plt.figure(figsize=figsize)
ax = fig.gca()
elif isinstance(ax, matplotlib.axes._subplots.AxesSubplot):
pass
else:
raise ValueError('ax must be None or matplotlib.axes._subplots.AxesSubplot')

if style == "bar":
plot_func = sns.barplot
_cols = set(mutspec.columns)
if 'MutSpec_median' in _cols and 'MutSpec_q05' in _cols and 'MutSpec_q95' in _cols:
ax = sns.barplot(data=mutspec, x="Mut", y='MutSpec',
order=sbs12_ordered, ax=ax, **kwargs)
mutspec_ordered = mutspec.set_index('Mut').loc[sbs12_ordered]
mutspec_ordered['MutSpec_q05'] = mutspec_ordered['MutSpec_median'] - mutspec_ordered['MutSpec_q05']
mutspec_ordered['MutSpec_q95'] -= mutspec_ordered['MutSpec_median']
ax.errorbar(mutspec_ordered.index, mutspec_ordered['MutSpec_median'],
yerr=mutspec_ordered[['MutSpec_q05', 'MutSpec_q95']].values.T,
fmt=".", color="gray", elinewidth=0.7, capsize=4)
else:
ax = sns.barplot(data=mutspec, x="Mut", y=spectra_col,
order=sbs12_ordered, ax=ax, **kwargs)

elif style == "box":
plot_func = sns.boxplot
ax = sns.boxplot(x="Mut", y=spectra_col, data=mutspec,
order=sbs12_ordered, ax=ax, **kwargs)

else:
raise NotImplementedError

fig = plt.figure(figsize=figsize)
ax = plot_func(x="Mut", y=spectra_col, data=mutspec, order=sbs12_ordered, ax=fig.gca(), **kwargs)

ax.grid(axis="y", alpha=.7, linewidth=0.5)

# map colors to bars
Expand All @@ -99,7 +123,7 @@ def plot_mutspec12(
plt.xticks(fontsize=ticksize, fontname=fontname)

if savepath is not None:
plt.savefig(savepath)
plt.savefig(savepath, bbox_inches="tight")
if show:
plt.show()
else:
Expand All @@ -121,7 +145,8 @@ def plot_mutspec192(
ticksize=6,
titlesize=16,
ylabelsize=16,
show=True,
show=True,
ax=None,
**kwargs,
):
"""
Expand All @@ -142,7 +167,7 @@ def plot_mutspec192(
savepath = kwargs["filepath"]
print("savepath =", savepath)
kwargs.pop("filepath")

# TODO add checks of mutspec192
ms192 = mutspec192.copy()
if labels_style == "long":
Expand All @@ -154,15 +179,44 @@ def plot_mutspec192(
x_col = "Mut"
else:
raise ValueError("Available labels_style are: 'cosmic' and 'long'")
fig = plt.figure(figsize=figsize)

if ax is None:
fig = plt.figure(figsize=figsize)
ax = fig.gca()
elif isinstance(ax, matplotlib.axes._subplots.AxesSubplot):
pass
else:
raise ValueError('ax must be None or matplotlib.axes._subplots.AxesSubplot')

if style == "bar":
ax = sns.barplot(
x=x_col, y=spectra_col, data=ms192, order=order, errwidth=1, ax=fig.gca(), **kwargs,
)
_cols = set(ms192.columns)
if 'MutSpec_median' in _cols and 'MutSpec_q05' in _cols and 'MutSpec_q95' in _cols:
ax = sns.barplot(data=ms192, x='Mut', y='MutSpec', order=order, ax=ax, **kwargs)
mutspec_ordered = ms192.set_index('Mut').loc[sbs_order]
mutspec_ordered['MutSpec_q05'] = mutspec_ordered['MutSpec_median'] - mutspec_ordered['MutSpec_q05']
mutspec_ordered['MutSpec_q95'] -= mutspec_ordered['MutSpec_median']

ymed, yerr_min, yerr_max = [], [], []
for mt in order:
if mt == '':
ymed.append(0.)
yerr_min.append(0.)
yerr_max.append(0.)
else:
ymed.append(mutspec_ordered.loc[mt, 'MutSpec_median'])
yerr_min.append(mutspec_ordered.loc[mt, 'MutSpec_q05'])
yerr_max.append(mutspec_ordered.loc[mt, 'MutSpec_q95'])
yerr = [yerr_min, yerr_max]
x = list(range(len(order)))
ax.errorbar(x, ymed, yerr=yerr, fmt=".", color="gray", elinewidth=0.7, capsize=2)
else:
ax = sns.barplot(data=ms192, x=x_col, y=spectra_col, order=order, errwidth=1, ax=ax, **kwargs)
elif style == "box":
ax = sns.boxplot(
x=x_col, y=spectra_col, data=ms192, order=order, ax=fig.gca(), **kwargs,
)
# plt.savefig(savepath, dpi=300, bbox_inches="tight")
# return
ax.grid(axis="y", alpha=.7, linewidth=0.5)
ax.set_title(title, fontsize=titlesize, fontname=fontname)
ax.set_ylabel(ylabel if ylabel else "", fontsize=ylabelsize, fontname=fontname)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "PyMutSpec"
version = "0.0.10"
version = "0.0.11"
authors = [
{ name="kpotoh", email="axepon@mail.ru" },
]
Expand Down
Loading