|
23 | 23 | from matplotlib.lines import Line2D
|
24 | 24 | import seaborn as sns
|
25 | 25 | import upsetplot
|
| 26 | +import pandas as pd |
26 | 27 |
|
27 | 28 | import sourmash
|
28 | 29 | from sourmash import sourmash_args
|
@@ -846,6 +847,14 @@ def __init__(self, subparser):
|
846 | 847 | "--no-labels", action="store_true",
|
847 | 848 | help="disable X & Y axis labels"
|
848 | 849 | )
|
| 850 | + subparser.add_argument( |
| 851 | + "--no-x-labels", action="store_true", |
| 852 | + help="disable X axis labels" |
| 853 | + ) |
| 854 | + subparser.add_argument( |
| 855 | + "--no-y-labels", action="store_true", |
| 856 | + help="disable Y axis labels" |
| 857 | + ) |
849 | 858 |
|
850 | 859 | def main(self, args):
|
851 | 860 | super().main(args)
|
@@ -901,12 +910,15 @@ def main(self, args):
|
901 | 910 | if args.boolean: # turn off colorbar if boolean.
|
902 | 911 | kw_args['cbar_pos'] = None
|
903 | 912 |
|
| 913 | + yticklabels=sample_d_to_idents(query_d_items) |
| 914 | + xticklabels=sample_d_to_idents(against_d_items) |
904 | 915 | if args.no_labels:
|
905 |
| - xticklabels=[] |
906 |
| - yticklabels=[] |
907 |
| - else: |
908 |
| - yticklabels=sample_d_to_idents(query_d_items) |
909 |
| - xticklabels=sample_d_to_idents(against_d_items) |
| 916 | + xticklabels = [] |
| 917 | + yticklabels = [] |
| 918 | + elif args.no_x_labels: |
| 919 | + xticklabels = [] |
| 920 | + elif args.no_y_labels: |
| 921 | + yticklabels = [] |
910 | 922 |
|
911 | 923 | # turn into dissimilarity matrix
|
912 | 924 | # plot!
|
@@ -1471,3 +1483,59 @@ def main(self, args):
|
1471 | 1483 | if args.output:
|
1472 | 1484 | notify(f"saving to '{args.output}'")
|
1473 | 1485 | pylab.savefig(args.output)
|
| 1486 | + |
| 1487 | + |
| 1488 | +class Command_PresenceFilter(CommandLinePlugin): |
| 1489 | + command = 'presence_filter' |
| 1490 | + description = """\ |
| 1491 | +Provide a filtered view of 'gather' output, plotting detection or ANI |
| 1492 | +against average abund for significant matches. |
| 1493 | +""" |
| 1494 | + |
| 1495 | + usage = """ |
| 1496 | + sourmash scripts presence_filter gather.csv -o presence.png |
| 1497 | +""" |
| 1498 | + epilog = epilog |
| 1499 | + formatter_class = argparse.RawTextHelpFormatter |
| 1500 | + |
| 1501 | + def __init__(self, subparser): |
| 1502 | + super().__init__(subparser) |
| 1503 | + # add argparse arguments here. |
| 1504 | + subparser.add_argument('gather_csv') |
| 1505 | + subparser.add_argument('-o', '--output', default=None, |
| 1506 | + help="save image to this file", |
| 1507 | + required=True) |
| 1508 | + subparser.add_argument('-N', '--min-num-hashes', |
| 1509 | + default=3, help='threshold (default: 3)') |
| 1510 | + subparser.add_argument('--detection', action="store_true", |
| 1511 | + default=True) |
| 1512 | + subparser.add_argument('--ani', dest='detection', |
| 1513 | + action="store_false") |
| 1514 | + |
| 1515 | + def main(self, args): |
| 1516 | + df = pd.read_csv(args.gather_csv) |
| 1517 | + notify(f"loaded {len(df)} rows from '{args.gather_csv}'") |
| 1518 | + |
| 1519 | + scaled = set(df['scaled']) |
| 1520 | + assert len(scaled) == 1 |
| 1521 | + scaled = list(scaled)[0] |
| 1522 | + |
| 1523 | + threshold = args.min_num_hashes * scaled |
| 1524 | + df = df[df['unique_intersect_bp'] >= threshold] |
| 1525 | + notify(f"filtered down to {len(df)} rows with unique_intersect_bp >= {threshold}") |
| 1526 | + |
| 1527 | + if args.detection: |
| 1528 | + plt.plot(df.f_match_orig, df.average_abund, '.') |
| 1529 | + else: |
| 1530 | + plt.plot(df.match_containment_ani, df.average_abund, '.') |
| 1531 | + ax = plt.gca() |
| 1532 | + ax.set_ylabel('number of copies') |
| 1533 | + ax.set_yscale('log') |
| 1534 | + |
| 1535 | + if args.detection: |
| 1536 | + ax.set_xlabel('fraction of genome detected') |
| 1537 | + else: |
| 1538 | + ax.set_xlabel('cANI of match') |
| 1539 | + |
| 1540 | + notify(f"saving figure to '{args.output}'") |
| 1541 | + plt.savefig(args.output) |
0 commit comments