9
9
import argparse
10
10
import os
11
11
import csv
12
- from collections import defaultdict
12
+ from collections import defaultdict , Counter
13
13
from itertools import chain , combinations
14
14
import pickle
15
15
@@ -1018,6 +1018,15 @@ def powerset(iterable, *, start=2):
1018
1018
1019
1019
notify (f"Loaded { len (siglist )} signatures & downsampled to scaled={ scaled } " )
1020
1020
1021
+ names_check = [ ss .name for ss in siglist ]
1022
+ if len (set (names_check )) != len (names_check ):
1023
+ notify ("ERROR: duplicate names or sketches; please fix!!" )
1024
+ cnt = Counter (names_check )
1025
+ for k , v in cnt .most_common ():
1026
+ if v > 1 :
1027
+ print (f"\t * { k } shows up { v } times" )
1028
+ sys .exit (- 1 )
1029
+
1021
1030
# @CTB: check scaled, ksize, etc.
1022
1031
1023
1032
if not siglist :
@@ -1041,6 +1050,7 @@ def powerset(iterable, *, start=2):
1041
1050
truncate_name = lambda x : x [:truncate_at - 3 ] + '...' if len (x ) >= truncate_at else x
1042
1051
get_name = lambda x : [ truncate_name (ss .name ) for ss in x ]
1043
1052
names = [ get_name (combo ) for combo in pset ]
1053
+
1044
1054
notify (f"powerset of distinct combinations: { len (pset )} " )
1045
1055
1046
1056
# CTB: maybe turn the intersection code below into a class?
@@ -1511,6 +1521,12 @@ def __init__(self, subparser):
1511
1521
default = True )
1512
1522
subparser .add_argument ('--ani' , dest = 'detection' ,
1513
1523
action = "store_false" )
1524
+ subparser .add_argument ('--green-color' ,
1525
+ help = "color genomes with matching names green" )
1526
+ subparser .add_argument ('--red-color' ,
1527
+ help = "color genomes with matching names red" )
1528
+ subparser .add_argument ('--blue-color' ,
1529
+ help = "color genomes with matching names blue" )
1514
1530
1515
1531
def main (self , args ):
1516
1532
df = pd .read_csv (args .gather_csv )
@@ -1525,9 +1541,35 @@ def main(self, args):
1525
1541
notify (f"filtered down to { len (df )} rows with unique_intersect_bp >= { threshold } " )
1526
1542
1527
1543
if args .detection :
1528
- plt .plot (df .f_match_orig , df .average_abund , '.' )
1544
+ plt .plot (df .f_match_orig , df .average_abund , 'k .' )
1529
1545
else :
1530
- plt .plot (df .match_containment_ani , df .average_abund , '.' )
1546
+ plt .plot (df .match_containment_ani , df .average_abund , 'k.' )
1547
+
1548
+ dfs = []
1549
+ colors = []
1550
+ if args .green_color :
1551
+ df2 = df [df ['match_name' ].str .contains (args .green_color )]
1552
+ notify (f"{ len (df2 )} matches to { args .green_color } => green circles" )
1553
+ dfs .append (df2 )
1554
+ colors .append ('go' )
1555
+ if args .red_color :
1556
+ df2 = df [df ['match_name' ].str .contains (args .red_color )]
1557
+ notify (f"{ len (df2 )} matches to { args .red_color } => red crosses" )
1558
+
1559
+ dfs .append (df2 )
1560
+ colors .append ('r+' )
1561
+ if args .blue_color :
1562
+ df2 = df [df ['match_name' ].str .contains (args .blue_color )]
1563
+ notify (f"{ len (df2 )} matches to { args .blue_color } => blue triangles" )
1564
+ dfs .append (df2 )
1565
+ colors .append ('bv' )
1566
+
1567
+ for (df2 , color ) in zip (dfs , colors ):
1568
+ if args .detection :
1569
+ plt .plot (df2 .f_match_orig , df2 .average_abund , color )
1570
+ else :
1571
+ plt .plot (df2 .match_containment_ani , df2 .average_abund , color )
1572
+
1531
1573
ax = plt .gca ()
1532
1574
ax .set_ylabel ('number of copies' )
1533
1575
ax .set_yscale ('log' )
0 commit comments