Skip to content

Commit a3a97aa

Browse files
authored
MRG: add filter_presence (#46)
* add --no-x-labels and --no-y-labels to clustermap * add more * bump to v0.4.3
1 parent 00a6533 commit a3a97aa

File tree

2 files changed

+76
-7
lines changed

2 files changed

+76
-7
lines changed

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ name = "sourmash_plugin_betterplot"
33
description = "sourmash plugin for improved plotting/viz and cluster examination."
44
readme = "README.md"
55
requires-python = ">=3.10"
6-
version = "0.4.2"
6+
version = "0.4.3"
77

88
dependencies = ["sourmash>=4.8.8,<5", "sourmash_utils>=0.2",
99
"matplotlib", "numpy", "scipy", "scikit-learn",
10-
"seaborn", "upsetplot", "matplotlib_venn"]
10+
"seaborn", "upsetplot", "matplotlib_venn", "pandas"]
1111

1212
[metadata]
1313
license = { text = "BSD 3-Clause License" }
@@ -24,3 +24,4 @@ cluster_to_categories_command = "sourmash_plugin_betterplot:Command_ClusterToCat
2424
tsne_command = "sourmash_plugin_betterplot:Command_TSNE"
2525
tsne2_command = "sourmash_plugin_betterplot:Command_TSNE2"
2626
venn = "sourmash_plugin_betterplot:Command_Venn"
27+
presence_filter = "sourmash_plugin_betterplot:Command_PresenceFilter"

src/sourmash_plugin_betterplot.py

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from matplotlib.lines import Line2D
2424
import seaborn as sns
2525
import upsetplot
26+
import pandas as pd
2627

2728
import sourmash
2829
from sourmash import sourmash_args
@@ -846,6 +847,14 @@ def __init__(self, subparser):
846847
"--no-labels", action="store_true",
847848
help="disable X & Y axis labels"
848849
)
850+
subparser.add_argument(
851+
"--no-x-labels", action="store_true",
852+
help="disable X axis labels"
853+
)
854+
subparser.add_argument(
855+
"--no-y-labels", action="store_true",
856+
help="disable Y axis labels"
857+
)
849858

850859
def main(self, args):
851860
super().main(args)
@@ -901,12 +910,15 @@ def main(self, args):
901910
if args.boolean: # turn off colorbar if boolean.
902911
kw_args['cbar_pos'] = None
903912

913+
yticklabels=sample_d_to_idents(query_d_items)
914+
xticklabels=sample_d_to_idents(against_d_items)
904915
if args.no_labels:
905-
xticklabels=[]
906-
yticklabels=[]
907-
else:
908-
yticklabels=sample_d_to_idents(query_d_items)
909-
xticklabels=sample_d_to_idents(against_d_items)
916+
xticklabels = []
917+
yticklabels = []
918+
elif args.no_x_labels:
919+
xticklabels = []
920+
elif args.no_y_labels:
921+
yticklabels = []
910922

911923
# turn into dissimilarity matrix
912924
# plot!
@@ -1471,3 +1483,59 @@ def main(self, args):
14711483
if args.output:
14721484
notify(f"saving to '{args.output}'")
14731485
pylab.savefig(args.output)
1486+
1487+
1488+
class Command_PresenceFilter(CommandLinePlugin):
1489+
command = 'presence_filter'
1490+
description = """\
1491+
Provide a filtered view of 'gather' output, plotting detection or ANI
1492+
against average abund for significant matches.
1493+
"""
1494+
1495+
usage = """
1496+
sourmash scripts presence_filter gather.csv -o presence.png
1497+
"""
1498+
epilog = epilog
1499+
formatter_class = argparse.RawTextHelpFormatter
1500+
1501+
def __init__(self, subparser):
1502+
super().__init__(subparser)
1503+
# add argparse arguments here.
1504+
subparser.add_argument('gather_csv')
1505+
subparser.add_argument('-o', '--output', default=None,
1506+
help="save image to this file",
1507+
required=True)
1508+
subparser.add_argument('-N', '--min-num-hashes',
1509+
default=3, help='threshold (default: 3)')
1510+
subparser.add_argument('--detection', action="store_true",
1511+
default=True)
1512+
subparser.add_argument('--ani', dest='detection',
1513+
action="store_false")
1514+
1515+
def main(self, args):
1516+
df = pd.read_csv(args.gather_csv)
1517+
notify(f"loaded {len(df)} rows from '{args.gather_csv}'")
1518+
1519+
scaled = set(df['scaled'])
1520+
assert len(scaled) == 1
1521+
scaled = list(scaled)[0]
1522+
1523+
threshold = args.min_num_hashes * scaled
1524+
df = df[df['unique_intersect_bp'] >= threshold]
1525+
notify(f"filtered down to {len(df)} rows with unique_intersect_bp >= {threshold}")
1526+
1527+
if args.detection:
1528+
plt.plot(df.f_match_orig, df.average_abund, '.')
1529+
else:
1530+
plt.plot(df.match_containment_ani, df.average_abund, '.')
1531+
ax = plt.gca()
1532+
ax.set_ylabel('number of copies')
1533+
ax.set_yscale('log')
1534+
1535+
if args.detection:
1536+
ax.set_xlabel('fraction of genome detected')
1537+
else:
1538+
ax.set_xlabel('cANI of match')
1539+
1540+
notify(f"saving figure to '{args.output}'")
1541+
plt.savefig(args.output)

0 commit comments

Comments
 (0)