From 905ac9967f40f7e88e08b3e1a354168b0a06cc7a Mon Sep 17 00:00:00 2001 From: Roman Swetlicki <64587275+Just-Roma@users.noreply.github.com> Date: Mon, 27 Mar 2023 10:32:34 +0200 Subject: [PATCH] Update coverage_metrics.py Restructure coverage data input for the 'write_data_file' method to produce .json and .txt files as from the master. --- multiqc/modules/dragen/coverage_metrics.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/multiqc/modules/dragen/coverage_metrics.py b/multiqc/modules/dragen/coverage_metrics.py index 5a3d251bf5..a0f70837fd 100644 --- a/multiqc/modules/dragen/coverage_metrics.py +++ b/multiqc/modules/dragen/coverage_metrics.py @@ -599,7 +599,8 @@ def add_coverage_metrics(self): # Write data to file. out_data = make_data_for_txt_report(cov_data) - self.write_data_file(out_data, "dragen_cov_metrics") + for phenotype in out_data: + self.write_data_file(out_data[phenotype], phenotype) # Extract coverage bed/target bed/wgs from _overall_mean_cov.csv files. # And prepare -specific texts. @@ -639,11 +640,22 @@ def check_duplicate_samples(sample_names): def make_data_for_txt_report(coverage_data): """Prepare data for the text report.""" - data = {} + + data = defaultdict(dict) for sample in coverage_data: for phenotype in coverage_data[sample]: - ID = sample + phenotype - data[ID] = coverage_data[sample][phenotype]["metrics_and_values"] + # Replace any sequence of spaces/hyphens/dots/underscores by single underscore. + new_phenotype = re.sub("(\s+|-+|\.+|_+)+", "_", phenotype) + # Append 'coverage_section' suffix as in the previous code version. + if new_phenotype == "wgs": + new_phenotype += "_cov_metrics" + elif "qc_coverage_region" in new_phenotype: + new_phenotype = new_phenotype.replace("qc_coverage_region", "qc_region") + "_coverage_metrics" + else: + new_phenotype += "_coverage_metrics" + new_phenotype = "dragen_" + new_phenotype + + data[new_phenotype][sample] = coverage_data[sample][phenotype]["metrics_and_values"] return data