Skip to content

Extra files storage #83

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: new_bed_compilance
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions bbconf/bbagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from bbconf.modules.bedfiles import BedAgentBedFile
from bbconf.modules.bedsets import BedAgentBedSet
from bbconf.modules.objects import BBObjects
from bbconf.modules.extras import BBExtras

from .const import PKG_NAME

Expand All @@ -45,6 +46,7 @@ def __init__(
self._bed = BedAgentBedFile(self.config, self)
self._bedset = BedAgentBedSet(self.config)
self._objects = BBObjects(self.config)
self._extras = BBExtras(self.config)

@property
def bed(self) -> BedAgentBedFile:
Expand All @@ -58,6 +60,13 @@ def bedset(self) -> BedAgentBedSet:
def objects(self) -> BBObjects:
return self._objects

@property
def extras(self) -> BBExtras:
"""
Get extra files methods
"""
return self._extras

def __repr__(self) -> str:
repr = f"BedBaseAgent(config={self.config})"
repr += f"\n{self.bed}"
Expand Down
16 changes: 10 additions & 6 deletions bbconf/config_parser/bedbaseconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import warnings
from pathlib import Path
from typing import List, Literal, Union
from typing import List, Literal, Union, Dict

import boto3
import qdrant_client
Expand All @@ -22,7 +22,9 @@
S3_BEDSET_PATH_FOLDER,
S3_FILE_PATH_FOLDER,
S3_PLOTS_PATH_FOLDER,
S3_EXTRA_FILE_PATH_FOLDER,
TEXT_EMBEDDING_DIMENSION,
DB_EXTRAS_TYPE,
)
from bbconf.config_parser.models import ConfigFile
from bbconf.const import PKG_NAME, ZARR_TOKENIZED_FOLDER
Expand All @@ -33,7 +35,7 @@
BedbaseS3ConnectionError,
)
from bbconf.helpers import get_absolute_path, get_bedbase_cfg
from bbconf.models.base_models import FileModel
from bbconf.models.base_models import FileModel, FileModelDict
from bbconf.models.bed_models import BedFiles, BedPlots
from bbconf.models.bedset_models import BedSetPlots
from bbconf.models.drs_models import AccessMethod, AccessURL
Expand Down Expand Up @@ -374,17 +376,17 @@ def upload_s3(self, file_path: str, s3_path: Union[Path, str]) -> None:
def upload_files_s3(
self,
identifier: str,
files: Union[BedFiles, BedPlots, BedSetPlots],
files: Union[BedFiles, BedPlots, BedSetPlots, FileModelDict],
base_path: str,
type: Literal["files", "plots", "bedsets"] = "files",
) -> Union[BedFiles, BedPlots, BedSetPlots]:
type: Literal["files", "plots", "bedsets", "extra"] = "files",
) -> Union[BedFiles, BedPlots, BedSetPlots, FileModelDict]:
"""
Upload files to s3.

:param identifier: bed file identifier
:param files: dictionary with files to upload
:param base_path: local path to the output files
:param type: type of files to upload [files, plots, bedsets]
:param type: type of files to upload [files, plots, bedsets, extra]
:return: None
"""

Expand All @@ -394,6 +396,8 @@ def upload_files_s3(
s3_output_base_folder = S3_PLOTS_PATH_FOLDER
elif type == "bedsets":
s3_output_base_folder = S3_BEDSET_PATH_FOLDER
elif type == DB_EXTRAS_TYPE:
s3_output_base_folder = S3_EXTRA_FILE_PATH_FOLDER
else:
raise BedBaseConfError(
f"Invalid type: {type}. Should be 'files', 'plots', or 'bedsets'"
Expand Down
3 changes: 3 additions & 0 deletions bbconf/config_parser/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,8 @@
S3_FILE_PATH_FOLDER = "files"
S3_PLOTS_PATH_FOLDER = "stats"
S3_BEDSET_PATH_FOLDER = "bedsets"
S3_EXTRA_FILE_PATH_FOLDER = "extra_files"

DB_EXTRAS_TYPE = "extra"

TEXT_EMBEDDING_DIMENSION = 384
12 changes: 11 additions & 1 deletion bbconf/db_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ class Files(Base):
)
title: Mapped[Optional[str]]
type: Mapped[str] = mapped_column(
default="file", comment="Type of the object, e.g. file, plot, ..."
default="file", comment="Type of the object, e.g. file, plot, extra, ..."
)
path: Mapped[str]
path_thumbnail: Mapped[str] = mapped_column(
Expand All @@ -285,6 +285,16 @@ class Files(Base):
bedfile: Mapped["Bed"] = relationship("Bed", back_populates="files")
bedset: Mapped["BedSets"] = relationship("BedSets", back_populates="files")

creation_date: Mapped[datetime.datetime] = mapped_column(
default=deliver_update_date,
)
last_update_date: Mapped[datetime.datetime] = mapped_column(
default=deliver_update_date, onupdate=deliver_update_date
)
genome_alias: Mapped[str] = mapped_column(
nullable=True, comment="Genome of the file (If any). Used for extra files."
)

__table_args__ = (
UniqueConstraint(
"name", "bedfile_id"
Expand Down
19 changes: 17 additions & 2 deletions bbconf/models/base_models.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List, Optional, Union, Dict
from typing import List, Optional, Union, Dict, Iterator, Tuple
import datetime

from pydantic import BaseModel, ConfigDict, Field
from pydantic import BaseModel, ConfigDict, Field, RootModel

from .drs_models import AccessMethod

Expand All @@ -16,10 +16,25 @@ class FileModel(BaseModel):
size: Optional[int] = None
object_id: Optional[str] = None
access_methods: List[AccessMethod] = None
last_update_date: Optional[Union[datetime.datetime, None]] = None
submission_date: Optional[Union[datetime.datetime, None]] = None
genome_alias: Optional[Union[str, None]] = None

model_config = ConfigDict(populate_by_name=True, extra="ignore")


class FileModelDict(RootModel[Dict[str, FileModel]]):

def __iter__(self) -> Iterator[Tuple[str, FileModel]]:
return iter(self.root.items())

def __getitem__(self, item):
return self.root[item]

def __getattr__(self, item):
return self.root[item]


class StatsReturn(BaseModel):
bedfiles_number: int = 0
bedsets_number: int = 0
Expand Down
13 changes: 13 additions & 0 deletions bbconf/models/extras_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import datetime
from typing import List, Optional, Union

from pydantic import BaseModel

from bbconf.models.base_models import FileModel


class ExtraFilesResults(BaseModel):
limit: int = 0
offset: int = 10
total: int = 0
results: List[FileModel]
186 changes: 186 additions & 0 deletions bbconf/modules/extras.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
import logging
from typing import Optional, Union
from pathlib import Path
import os

from bbconf.config_parser.bedbaseconfig import BedBaseConfig
from sqlalchemy import select, and_, func, or_
from sqlalchemy.orm import Session
from bbconf.const import PKG_NAME
from bbconf.models.base_models import FileModel, FileModelDict
from bbconf.db_utils import Files
from bbconf.models.extras_models import ExtraFilesResults
from bbconf.config_parser.const import DB_EXTRAS_TYPE

_LOGGER = logging.getLogger(PKG_NAME)


class BBExtras:
"""
Class that holds DRS objects methods.
"""

def __init__(self, config: BedBaseConfig):
"""
:param config: config object
"""
self.config = config

def add_extra_file(
self,
name: str,
title: Union[str, Path],
path: str,
description: Optional[str] = "",
genome: Optional[str] = None,
) -> None:
"""
Add files that are not related to bed files, but are important for analysis.

:param name: Type of the file. e.g. open_signal_matrix
:param title: Name of the file. e.g. open_signal_matrix_hg38 -> should be unique.
:param path: Local path to the file.
:param description: Description of the file that will be uploaded
"""

if not isinstance(path, Path):
path = Path(path)

if not path.is_file():
raise FileNotFoundError(f"Provided file doesn't exist: '{str(path)}'")

with Session(self.config.db_engine.engine) as session:

# aws_file_path = self._upload_extra_file(path, name=name)
files_dict = {
name: FileModel(
name=name,
title=title,
path=str(path),
description=description,
genome_alias=genome,
)
}
files_annotation = FileModelDict(
**files_dict,
)

files_annotation: FileModelDict = self.config.upload_files_s3(
name, files=files_annotation, base_path="", type=DB_EXTRAS_TYPE
)
for key, file in files_annotation:
session.add(
Files(
name=file.name,
type=DB_EXTRAS_TYPE,
title=file.title,
path=file.path,
description=file.description,
size=file.size,
genome_alias=file.genome_alias,
)
)

session.commit()

def search_files(
self, query: str = None, limit: int = 10, offset: int = 0
) -> ExtraFilesResults:
"""
Get extra files from the database.

:param query: Query: Name of the file to get. e.g. 'open_signal_matrix_hg38', or just 'open'
:param limit: Query: page number to get. [Default: 10].
:param offset: Query: page size to get. [Default: 0].
:return: ExtraFilesResults object with the results.
"""

with Session(self.config.db_engine.engine) as session:

if query:
where_statement = and_(
or_(
Files.name.ilike(f"%{query}%"),
Files.description.ilike(f"%{query}%"),
),
Files.type == DB_EXTRAS_TYPE,
)
else:
where_statement = Files.type == DB_EXTRAS_TYPE

results = session.scalars(
select(Files).where(where_statement).limit(limit).offset(offset)
)

total_results = session.execute(
select(func.count(Files.id)).where(where_statement)
).one()[0]

return ExtraFilesResults(
limit=limit,
offset=offset,
total=total_results,
results=[
FileModel(
name=file.name,
title=file.title,
path=file.path,
description=file.description,
size=file.size,
genome_alias=file.genome_alias,
access_methods=self.config.construct_access_method_list(
file.path
),
)
for file in results
],
)

def get(self, name: str) -> FileModel:
"""
Get extra file from the database.

:param name: Name of the file to get. e.g. 'open_signal_matrix_hg38'
:return: FileModel object with the file metadata.
"""

with Session(self.config.db_engine.engine) as session:

where_statement = and_(Files.name == name, Files.type == DB_EXTRAS_TYPE)

result = session.scalars(select(Files).where(where_statement)).one_or_none()

if not result:
raise FileNotFoundError(f"File {name} not found")

return FileModel(
name=result.name,
title=result.title,
path=result.path,
description=result.description,
size=result.size,
genome_alias=result.genome_alias,
access_methods=self.config.construct_access_method_list(result.path),
)

def delete(self, name: str) -> None:
"""
Delete extra file from the database.

:param name: Name of the file to delete. e.g. 'open_signal_matrix_hg38'
"""

with Session(self.config.db_engine.engine) as session:

where_statement = and_(Files.name == name, Files.type == DB_EXTRAS_TYPE)

result = session.scalar(select(Files).where(where_statement))

if not result:
raise FileNotFoundError(f"File {name} not found")

self.config.delete_s3(result.path)

session.delete(result)

session.commit()
23 changes: 22 additions & 1 deletion manual_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,25 @@ def compreh_stats():
results


def add_extra_files():
from bbconf import BedBaseAgent

agent = BedBaseAgent(config="/home/bnt4me/virginia/repos/bedhost/config.yaml")
#
agent.extras.add_extra_file(
name="test_test",
title="some_test_file",
path="/home/bnt4me/Downloads/region_commonality.svg",
description="This is test for extra file",
genome="hg38",
)
ff = agent.extras.search_files(query="t")

# f1 = agent.extras.get(name="test_test")
# f1
# agent.extras.delete(name="test_test")


if __name__ == "__main__":
# zarr_s3()
# add_s3()
Expand All @@ -228,4 +247,6 @@ def compreh_stats():
# neighbour_beds()
# sql_search()
# config_t()
compreh_stats()
# compreh_stats()

add_extra_files()
Loading