Skip to content

Commit

Permalink
peptide and features finished.
Browse files Browse the repository at this point in the history
  • Loading branch information
ypriverol committed Oct 21, 2024
1 parent c865bff commit 560347a
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 27 deletions.
11 changes: 3 additions & 8 deletions quantmsio/commands/diann_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,7 @@ def diann_convert_to_parquet(
duckdb_threads: The number of threads for the DuckDB engine (e.g 4)
file_num: The number of files being processed at the same time
"""
if (
report_path is None
or mzml_info_folder is None
or output_folder is None
or sdrf_path is None
):
if report_path is None or mzml_info_folder is None or output_folder is None or sdrf_path is None:
raise click.UsageError("Please provide all the required parameters")

if not os.path.exists(output_folder):
Expand All @@ -97,7 +92,7 @@ def diann_convert_to_parquet(
dia_nn.write_feature_to_file(
qvalue_threshold=qvalue_threshold,
mzml_info_folder=mzml_info_folder,
output_path = feature_output_path,
file_num = file_num,
output_path=feature_output_path,
file_num=file_num,
protein_file=protein_file,
)
33 changes: 14 additions & 19 deletions quantmsio/core/diann.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

MODIFICATION_PATTERN = re.compile(r"\((.*?)\)")


def find_modification(peptide):
"""
Identify the modification site based on the peptide containing modifications.
Expand Down Expand Up @@ -143,13 +144,7 @@ def get_peptide_map_from_database(self):
logging.info("Time to load peptide map {} seconds".format(et))
return best_ref_map

def main_report_df(
self,
qvalue_threshold: float,
mzml_info_folder: str,
file_num: int,
protein_str: str = None
):
def main_report_df(self, qvalue_threshold: float, mzml_info_folder: str, file_num: int, protein_str: str = None):
def intergrate_msg(n):
nonlocal report
nonlocal mzml_info_folder
Expand Down Expand Up @@ -209,7 +204,7 @@ def intergrate_msg(n):
report["Modified.Sequence"] = report["Modified.Sequence"].map(modifications_map)
# pep
report["scan_reference_file_name"] = report["Precursor.Id"].map(best_ref_map)
#report["scan"] = None
# report["scan"] = None
report.rename(columns=DIANN_MAP, inplace=True)
# add extra msg
report = self.add_additional_msg(report)
Expand All @@ -235,21 +230,24 @@ def add_additional_msg(self, report: pd.DataFrame) -> pd.DataFrame:
)
report["scan"] = report["scan"].apply(generate_scan_number)
report.loc[:, "gg_names"] = report["gg_names"].str.split(",")
report.loc[:, "additional_intensities"] = report["Precursor.Normalised"].apply(lambda v: [{"name": "normalized intensity", "value": np.float32(v)}])
report.loc[:, "additional_scores"] = report[["Q.Value","PG.Q.Value"]].apply(lambda row: [{"name": "qvalue", "value": row["Q.Value"]}, {"name": "pg_qvalue", "value": row["PG.Q.Value"]}],axis=1)
report.loc[:, "additional_intensities"] = report["Precursor.Normalised"].apply(
lambda v: [{"name": "normalized intensity", "value": np.float32(v)}]
)
report.loc[:, "additional_scores"] = report[["Q.Value", "PG.Q.Value"]].apply(
lambda row: [
{"name": "qvalue", "value": row["Q.Value"]},
{"name": "pg_qvalue", "value": row["PG.Q.Value"]},
],
axis=1,
)
report.loc[:, "modification_details"] = None
report.loc[:, "cv_params"] = None
report.loc[:, "gg_accessions"] = None
report.loc[:, "best_id_score"] = None
return report


def generate_feature(
self,
qvalue_threshold: float,
mzml_info_folder: str,
file_num: int = 50,
protein_str: str = None
self, qvalue_threshold: float, mzml_info_folder: str, file_num: int = 50, protein_str: str = None
):
for report in self.main_report_df(qvalue_threshold, mzml_info_folder, file_num, protein_str):
s = time.time()
Expand All @@ -260,7 +258,6 @@ def generate_feature(
logging.info("Time to generate psm and feature file {} seconds".format(et))
yield feature


def write_feature_to_file(
self,
qvalue_threshold: float,
Expand Down Expand Up @@ -308,5 +305,3 @@ def merge_sdrf_to_feature(self, report):
inplace=True,
)
return report


0 comments on commit 560347a

Please sign in to comment.