Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions pyprophet/data_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@
def profile(fun):
return fun

def format_bytes(size):
for unit in ['B', 'KB', 'MB', 'GB']:
if size < 1024.0:
return f"{size:.2f} {unit}"
size /= 1024.0
return f"{size:.2f} TB"

# selection of scores with low cross-correlation for metabolomics scoring
def use_metabolomics_scores():
return [
Expand Down Expand Up @@ -112,8 +119,12 @@ def create_index_if_not_exists(con, index_name, table_name, column_name):


def is_parquet_file(file_path):
'''
Check if the file is a valid Parquet file.
'''
import pyarrow.parquet as pq
from pyarrow.lib import ArrowInvalid, ArrowIOError

# First check extension
if not os.path.splitext(file_path)[1].lower() in ('.parquet', '.pq'):
return False
Expand All @@ -124,6 +135,29 @@ def is_parquet_file(file_path):
return True
except (ArrowInvalid, ArrowIOError, OSError):
return False

def is_valid_split_parquet_dir(path):
'''
Checks if the directory contains both required parquet files
and that each is a valid Parquet file.
'''
if not os.path.isdir(path):
return False

required_files = [
"precursors_features.parquet",
"transition_features.parquet"
]

for filename in required_files:
full_path = os.path.join(path, filename)
if not os.path.isfile(full_path):
return False
if not is_parquet_file(full_path):
return False

return True


def get_parquet_column_names(file_path):
"""
Expand Down
Loading