Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 15 additions & 28 deletions pyprophet/export/export_report.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import sqlite3
import pandas as pd


from .._config import ExportIOConfig
from ..report import post_scoring_report
from ..io.dispatcher import ReaderDispatcher
from ..io.util import get_parquet_column_names
from ..io.util import check_sqlite_table
from ..report import plot_scores
Expand Down Expand Up @@ -130,35 +133,19 @@ def export_scored_report(
The format of the scoring report, either 'osw' or 'parquet'. Default is 'osw'.
"""

cols_infile = get_parquet_column_names(infile)

select_cols = [
"RUN_ID",
"PROTEIN_ID",
"PEPTIDE_ID",
"PRECURSOR_ID",
"PRECURSOR_DECOY",
"FEATURE_MS2_AREA_INTENSITY",
"SCORE_MS2_SCORE",
"SCORE_MS2_PEAK_GROUP_RANK",
"SCORE_MS2_Q_VALUE",
"SCORE_PEPTIDE_GLOBAL_SCORE",
"SCORE_PEPTIDE_GLOBAL_Q_VALUE",
"SCORE_PEPTIDE_EXPERIMENT_WIDE_SCORE",
"SCORE_PEPTIDE_EXPERIMENT_WIDE_Q_VALUE",
"SCORE_PEPTIDE_RUN_SPECIFIC_SCORE",
"SCORE_PEPTIDE_RUN_SPECIFIC_Q_VALUE",
"SCORE_PROTEIN_GLOBAL_SCORE",
"SCORE_PROTEIN_GLOBAL_Q_VALUE",
"SCORE_PROTEIN_EXPERIMENT_WIDE_SCORE",
"SCORE_PROTEIN_EXPERIMENT_WIDE_Q_VALUE",
"SCORE_IPF_QVALUE",
]
config = ExportIOConfig(
infile=infile,
outfile=outfile,
subsample_ratio=1.0, # not used for export/report paths
level="export",
context="export_scored_report",
# no need to set export_format for this utility
)

# Filter select cols based on available columns in the input file
select_cols = [col for col in select_cols if col in cols_infile]
# Get the right reader for the detected file type & context.
reader = ReaderDispatcher.get_reader(config)

# Load the input data
df = pd.read_parquet(infile, columns=select_cols)
# Read once (works for OSW or Parquet via their respective readers).
df = reader.read()

post_scoring_report(df, outfile)
Loading