Skip to content

Commit

Permalink
Revert corems type, add start of lipid workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
kheal committed Nov 15, 2024
1 parent 48c43fe commit f0c6116
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 1 deletion.
65 changes: 65 additions & 0 deletions metaMS/lcms_lipidomics_workflow.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from dataclasses import dataclass
import toml
from pathlib import Path
import datetime
from multiprocessing import Pool

from corems.mass_spectra.input.mzml import MZMLSpectraParser
from corems.mass_spectra.input.rawFileReader import ImportMassSpectraThermoMSFileReader

@dataclass
class LipidomicsWorkflowParameters:
Expand Down Expand Up @@ -33,6 +37,38 @@ class LipidomicsWorkflowParameters:
scan_translator_path: str = None
cores: int = 1

def instantiate_lcms_obj(file_in):
"""Instantiate a corems LCMS object from a binary file. Pull in ms1 spectra into dataframe (without storing as MassSpectrum objects to save memory)
Parameters
----------
file_in : str or Path
Path to binary file
verbose : bool
Whether to print verbose output
Returns
-------
myLCMSobj : corems LCMS object
LCMS object with ms1 spectra in dataframe
"""
# Instantiate parser based on binary file type
if ".raw" in str(file_in):
pass
#parser = ImportMassSpectraThermoMSFileReader(file_in)

if ".mzML" in str(file_in):
parser = MZMLSpectraParser(file_in)

# Instantiate lc-ms data object using parser and pull in ms1 spectra into dataframe (without storing as MassSpectrum objects to save memory)
myLCMSobj = parser.get_lcms_obj(spectra="ms1")

return myLCMSobj

def run_lipid_sp_ms1(file_in, out_path, params_toml, scan_translator):
time_start = datetime.datetime.now()
myLCMSobj = instantiate_lcms_obj(file_in)
# TODO KRH: Add signal processing and ms1 molecular search here

def run_lcms_lipidomics_workflow(
lipidomics_workflow_paramaters_file=None,
Expand Down Expand Up @@ -65,4 +101,33 @@ def run_lcms_lipidomics_workflow(
files_list = list(file_dir.glob("*.raw"))
out_paths_list = [out_dir / f.stem for f in files_list]

# Run signal processing, get associated ms1, add associated ms2, do ms1 molecular search, and export temp results
cores = lipid_workflow_params.cores
params_toml = lipid_workflow_params.corems_toml_path
scan_translator = lipid_workflow_params.scan_translator_path
if cores == 1 or len(files_list) == 1:
mz_dicts = []
for file_in, file_out in list(zip(files_list, out_paths_list)):
mz_dict = run_lipid_sp_ms1(
file_in=str(file_in),
out_path=str(file_out),
params_toml=params_toml,
scan_translator=scan_translator,
)
mz_dicts.append(mz_dict)
elif cores > 1:
pool = Pool(cores)
args = [
(
str(file_in),
str(file_out),
params_toml,
scan_translator,
)
for file_in, file_out in list(zip(files_list, out_paths_list))
]
mz_dicts = pool.starmap(run_lipid_sp_ms1, args)
pool.close()
pool.join()

# TODO KRH: Add full lipidomics workflow here
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
corems>=3.0.0
corems>=2.0.0
Click>=7.1.1
requests
nmdc-schema>=7.0.0
Expand Down

0 comments on commit f0c6116

Please sign in to comment.