From 9ecd25b906179a7161d5a7a7cd66eba0f9616acd Mon Sep 17 00:00:00 2001 From: Ian Boyes Date: Thu, 14 Dec 2023 09:28:36 -0800 Subject: [PATCH] fix: support unpaired reads in `fastqc` and add testing --- tests/__snapshots__/test_fastqc.ambr | 769 ++++++++++++++++++++++++++- tests/test_fastqc.py | 25 +- virtool_workflow/analysis/fastqc.py | 29 + 3 files changed, 821 insertions(+), 2 deletions(-) diff --git a/tests/__snapshots__/test_fastqc.ambr b/tests/__snapshots__/test_fastqc.ambr index febf938b3..1a9fb8d93 100644 --- a/tests/__snapshots__/test_fastqc.ambr +++ b/tests/__snapshots__/test_fastqc.ambr @@ -258,7 +258,7 @@ 0, ]) # --- -# name: test_fastqc +# name: test_fastqc_paired dict({ 'bases': list([ list([ @@ -1025,3 +1025,770 @@ ]), }) # --- +# name: test_fastqc_unpaired + dict({ + 'bases': list([ + list([ + 26.942, + 32.0, + 27.0, + 32.0, + 12.0, + 32.0, + ]), + list([ + 30.098, + 32.0, + 32.0, + 32.0, + 27.0, + 32.0, + ]), + list([ + 30.309, + 32.0, + 32.0, + 32.0, + 27.0, + 32.0, + ]), + list([ + 34.152, + 37.0, + 32.0, + 37.0, + 32.0, + 37.0, + ]), + list([ + 35.723, + 37.0, + 37.0, + 37.0, + 32.0, + 37.0, + ]), + list([ + 38.186, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 38.197, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 39.469, + 41.0, + 41.0, + 41.0, + 37.0, + 41.0, + ]), + list([ + 38.655, + 41.0, + 41.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 37.962, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 38.929, + 41.0, + 41.0, + 41.0, + 37.0, + 41.0, + ]), + list([ + 39.0, + 41.0, + 41.0, + 41.0, + 37.0, + 41.0, + ]), + list([ + 38.543, + 41.0, + 41.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 35.378, + 41.0, + 37.0, + 41.0, + 12.0, + 41.0, + ]), + list([ + 38.417, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 37.73, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 38.969, + 41.0, + 41.0, + 41.0, + 37.0, + 41.0, + ]), + list([ + 38.934, + 41.0, + 41.0, + 41.0, + 37.0, + 41.0, + ]), + list([ + 35.786, + 41.0, + 37.0, + 41.0, + 12.0, + 41.0, + ]), + list([ + 38.034, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 37.416, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 39.125, + 41.0, + 41.0, + 41.0, + 37.0, + 41.0, + ]), + list([ + 37.288, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 35.267, + 41.0, + 32.0, + 41.0, + 12.0, + 41.0, + ]), + list([ + 38.068, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 38.703, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 38.704, + 41.0, + 41.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 38.593, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 38.731, + 41.0, + 41.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 37.982, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 37.146, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 37.569, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 33.155, + 37.0, + 27.0, + 41.0, + 12.0, + 41.0, + ]), + list([ + 37.408, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 35.83, + 41.0, + 37.0, + 41.0, + 22.0, + 41.0, + ]), + list([ + 38.273, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 37.493, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 35.842, + 41.0, + 37.0, + 41.0, + 22.0, + 41.0, + ]), + list([ + 38.232, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 37.889, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 37.833, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 37.337, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 32.029, + 37.0, + 27.0, + 41.0, + 12.0, + 41.0, + ]), + list([ + 36.283, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 37.939, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 36.777, + 41.0, + 37.0, + 41.0, + 22.0, + 41.0, + ]), + list([ + 37.161, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + list([ + 37.858, + 41.0, + 37.0, + 41.0, + 32.0, + 41.0, + ]), + list([ + 33.493, + 41.0, + 27.0, + 41.0, + 12.0, + 41.0, + ]), + list([ + 37.108, + 41.0, + 37.0, + 41.0, + 27.0, + 41.0, + ]), + ]), + 'composition': list([ + list([ + 93.3, + 1.4, + 2.1, + 3.2, + ]), + list([ + 25.3, + 33.6, + 32.6, + 8.5, + ]), + list([ + 19.9, + 35.6, + 29.1, + 15.4, + ]), + list([ + 18.6, + 24.8, + 40.9, + 15.7, + ]), + list([ + 18.3, + 25.3, + 29.7, + 26.8, + ]), + list([ + 21.7, + 29.9, + 29.9, + 18.6, + ]), + list([ + 18.2, + 28.9, + 35.7, + 17.1, + ]), + list([ + 18.7, + 28.5, + 30.5, + 22.4, + ]), + list([ + 20.2, + 28.6, + 33.1, + 18.1, + ]), + list([ + 19.8, + 28.2, + 28.8, + 23.2, + ]), + list([ + 17.6, + 31.4, + 30.5, + 20.5, + ]), + list([ + 16.5, + 28.4, + 29.6, + 25.5, + ]), + list([ + 16.1, + 27.0, + 31.7, + 25.1, + ]), + list([ + 22.4, + 26.6, + 26.2, + 24.9, + ]), + list([ + 20.3, + 27.8, + 28.9, + 23.0, + ]), + list([ + 20.2, + 33.9, + 23.6, + 22.3, + ]), + list([ + 22.5, + 27.6, + 24.8, + 25.1, + ]), + list([ + 19.0, + 32.4, + 23.9, + 24.7, + ]), + list([ + 21.7, + 31.3, + 25.8, + 21.2, + ]), + list([ + 19.9, + 25.8, + 28.5, + 25.8, + ]), + list([ + 16.7, + 27.0, + 31.4, + 25.0, + ]), + list([ + 21.3, + 25.6, + 26.9, + 26.3, + ]), + list([ + 24.3, + 29.4, + 28.9, + 17.4, + ]), + list([ + 24.4, + 32.1, + 28.6, + 14.8, + ]), + list([ + 22.7, + 19.7, + 43.9, + 13.7, + ]), + list([ + 53.6, + 23.7, + 10.9, + 11.8, + ]), + list([ + 14.0, + 52.3, + 21.1, + 12.6, + ]), + list([ + 10.1, + 8.8, + 57.9, + 23.2, + ]), + list([ + 20.4, + 6.3, + 13.9, + 59.4, + ]), + list([ + 61.9, + 7.5, + 18.7, + 11.8, + ]), + list([ + 16.0, + 7.7, + 56.8, + 19.5, + ]), + list([ + 21.5, + 12.5, + 7.8, + 58.2, + ]), + list([ + 64.9, + 11.6, + 7.7, + 15.8, + ]), + list([ + 54.3, + 18.7, + 17.3, + 9.7, + ]), + list([ + 17.8, + 50.9, + 10.7, + 20.6, + ]), + list([ + 10.2, + 8.5, + 30.4, + 50.9, + ]), + list([ + 21.2, + 16.2, + 54.9, + 7.7, + ]), + list([ + 59.0, + 11.6, + 20.9, + 8.6, + ]), + list([ + 10.8, + 29.9, + 52.4, + 6.9, + ]), + list([ + 19.6, + 59.7, + 14.1, + 6.6, + ]), + list([ + 51.0, + 23.4, + 9.1, + 16.4, + ]), + list([ + 7.8, + 63.4, + 17.2, + 11.6, + ]), + list([ + 7.7, + 51.6, + 15.9, + 24.9, + ]), + list([ + 12.0, + 8.0, + 26.4, + 53.6, + ]), + list([ + 16.0, + 7.6, + 55.5, + 20.9, + ]), + list([ + 9.6, + 15.2, + 25.0, + 50.2, + ]), + list([ + 23.4, + 14.7, + 52.7, + 9.2, + ]), + list([ + 55.4, + 20.5, + 10.6, + 13.5, + ]), + list([ + 9.3, + 60.5, + 16.1, + 14.1, + ]), + list([ + 9.5, + 51.1, + 14.3, + 25.0, + ]), + ]), + 'count': 25000, + 'encoding': 'Sanger / Illumina 1.9', + 'gc': 46.0, + 'length': list([ + 50, + 50, + ]), + 'sequences': list([ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 2, + 0, + 2, + 5, + 24, + 33, + 74, + 88, + 111, + 162, + 168, + 187, + 227, + 300, + 306, + 348, + 400, + 454, + 532, + 596, + 726, + 856, + 1077, + 1598, + 2086, + 3099, + 6346, + 5193, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ]), + }) +# --- diff --git a/tests/test_fastqc.py b/tests/test_fastqc.py index 83b97ecd1..8c42d314a 100644 --- a/tests/test_fastqc.py +++ b/tests/test_fastqc.py @@ -462,7 +462,7 @@ async def test_composite(self, snapshot: SnapshotAssertion): assert composite_parser.data == snapshot -async def test_fastqc( +async def test_fastqc_paired( example_path: Path, run_subprocess: RunSubprocess, snapshot: SnapshotAssertion, @@ -491,3 +491,26 @@ async def test_fastqc( ) assert out == snapshot + + +async def test_fastqc_unpaired( + example_path: Path, + run_subprocess: RunSubprocess, + snapshot: SnapshotAssertion, + work_path: Path, +): + shutil.copyfile( + example_path / "sample/reads_1.fq.gz", + work_path / "reads_1.fq.gz", + ) + + output_path = work_path / "fastqc" + + func = await fastqc(run_subprocess) + + out = await func( + (work_path / "reads_1.fq.gz",), + output_path, + ) + + assert out == snapshot diff --git a/virtool_workflow/analysis/fastqc.py b/virtool_workflow/analysis/fastqc.py index 75544b256..85fcbe8e9 100644 --- a/virtool_workflow/analysis/fastqc.py +++ b/virtool_workflow/analysis/fastqc.py @@ -361,6 +361,35 @@ def _parse_fastqc(fastqc_path: Path, output_path: Path) -> dict: ) ) + if len(sides) == 1: + left = sides[0] + + return { + "bases": [ + [ + round(n, 3) + for n in [ + point.mean, + point.median, + point.lower_quartile, + point.upper_quartile, + point.tenth_percentile, + point.ninetieth_percentile, + ] + ] + for point in left.base_quality.data + ], + "composition": [ + [round(n, 1) for n in [point.g, point.a, point.t, point.c]] + for point in left.nucleotide_composition.data + ], + "count": left.basic_statistics.count, + "encoding": left.basic_statistics.encoding, + "gc": left.basic_statistics.gc, + "length": left.basic_statistics.length, + "sequences": left.sequence_quality.data, + } + left, right = sides basic = left.basic_statistics.composite(right.basic_statistics)