From 560347a47326fc3f73f800faf1098c8e25f0a9bd Mon Sep 17 00:00:00 2001 From: Yasset Perez-Riverol Date: Mon, 21 Oct 2024 14:08:55 +0200 Subject: [PATCH] peptide and features finished. --- quantmsio/commands/diann_command.py | 11 +++------- quantmsio/core/diann.py | 33 ++++++++++++----------------- 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/quantmsio/commands/diann_command.py b/quantmsio/commands/diann_command.py index a3847e2..cff64b6 100644 --- a/quantmsio/commands/diann_command.py +++ b/quantmsio/commands/diann_command.py @@ -71,12 +71,7 @@ def diann_convert_to_parquet( duckdb_threads: The number of threads for the DuckDB engine (e.g 4) file_num: The number of files being processed at the same time """ - if ( - report_path is None - or mzml_info_folder is None - or output_folder is None - or sdrf_path is None - ): + if report_path is None or mzml_info_folder is None or output_folder is None or sdrf_path is None: raise click.UsageError("Please provide all the required parameters") if not os.path.exists(output_folder): @@ -97,7 +92,7 @@ def diann_convert_to_parquet( dia_nn.write_feature_to_file( qvalue_threshold=qvalue_threshold, mzml_info_folder=mzml_info_folder, - output_path = feature_output_path, - file_num = file_num, + output_path=feature_output_path, + file_num=file_num, protein_file=protein_file, ) diff --git a/quantmsio/core/diann.py b/quantmsio/core/diann.py index 513a0bf..abb39d6 100644 --- a/quantmsio/core/diann.py +++ b/quantmsio/core/diann.py @@ -19,6 +19,7 @@ MODIFICATION_PATTERN = re.compile(r"\((.*?)\)") + def find_modification(peptide): """ Identify the modification site based on the peptide containing modifications. @@ -143,13 +144,7 @@ def get_peptide_map_from_database(self): logging.info("Time to load peptide map {} seconds".format(et)) return best_ref_map - def main_report_df( - self, - qvalue_threshold: float, - mzml_info_folder: str, - file_num: int, - protein_str: str = None - ): + def main_report_df(self, qvalue_threshold: float, mzml_info_folder: str, file_num: int, protein_str: str = None): def intergrate_msg(n): nonlocal report nonlocal mzml_info_folder @@ -209,7 +204,7 @@ def intergrate_msg(n): report["Modified.Sequence"] = report["Modified.Sequence"].map(modifications_map) # pep report["scan_reference_file_name"] = report["Precursor.Id"].map(best_ref_map) - #report["scan"] = None + # report["scan"] = None report.rename(columns=DIANN_MAP, inplace=True) # add extra msg report = self.add_additional_msg(report) @@ -235,21 +230,24 @@ def add_additional_msg(self, report: pd.DataFrame) -> pd.DataFrame: ) report["scan"] = report["scan"].apply(generate_scan_number) report.loc[:, "gg_names"] = report["gg_names"].str.split(",") - report.loc[:, "additional_intensities"] = report["Precursor.Normalised"].apply(lambda v: [{"name": "normalized intensity", "value": np.float32(v)}]) - report.loc[:, "additional_scores"] = report[["Q.Value","PG.Q.Value"]].apply(lambda row: [{"name": "qvalue", "value": row["Q.Value"]}, {"name": "pg_qvalue", "value": row["PG.Q.Value"]}],axis=1) + report.loc[:, "additional_intensities"] = report["Precursor.Normalised"].apply( + lambda v: [{"name": "normalized intensity", "value": np.float32(v)}] + ) + report.loc[:, "additional_scores"] = report[["Q.Value", "PG.Q.Value"]].apply( + lambda row: [ + {"name": "qvalue", "value": row["Q.Value"]}, + {"name": "pg_qvalue", "value": row["PG.Q.Value"]}, + ], + axis=1, + ) report.loc[:, "modification_details"] = None report.loc[:, "cv_params"] = None report.loc[:, "gg_accessions"] = None report.loc[:, "best_id_score"] = None return report - def generate_feature( - self, - qvalue_threshold: float, - mzml_info_folder: str, - file_num: int = 50, - protein_str: str = None + self, qvalue_threshold: float, mzml_info_folder: str, file_num: int = 50, protein_str: str = None ): for report in self.main_report_df(qvalue_threshold, mzml_info_folder, file_num, protein_str): s = time.time() @@ -260,7 +258,6 @@ def generate_feature( logging.info("Time to generate psm and feature file {} seconds".format(et)) yield feature - def write_feature_to_file( self, qvalue_threshold: float, @@ -308,5 +305,3 @@ def merge_sdrf_to_feature(self, report): inplace=True, ) return report - -