Skip to content

Commit

Permalink
Implement RO Crate export of Galaxy histories and invocations.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Sep 13, 2022
1 parent c1008f5 commit 3dbe289
Show file tree
Hide file tree
Showing 7 changed files with 232 additions and 1 deletion.
2 changes: 2 additions & 0 deletions lib/galaxy/dependencies/pinned-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ amqp==5.1.1 ; python_version >= "3.7" and python_version < "3.11"
anyio==3.6.1 ; python_version >= "3.7" and python_version < "3.11"
apispec==5.2.2 ; python_version >= "3.7" and python_version < "3.11"
appdirs==1.4.4 ; python_version >= "3.7" and python_version < "3.11"
arcp==0.2.1
argcomplete==2.0.0 ; python_version >= "3.7" and python_version < "3.11"
async-timeout==4.0.2 ; python_version >= "3.7" and python_version < "3.11"
asynctest==0.13.0 ; python_version >= "3.7" and python_version < "3.8"
Expand Down Expand Up @@ -139,6 +140,7 @@ requests-oauthlib==1.3.1 ; python_version >= "3.7" and python_version < "3.11"
requests-toolbelt==0.9.1 ; python_version >= "3.7" and python_version < "3.11"
requests==2.28.1 ; python_version >= "3.7" and python_version < "3.11"
rich==12.5.1 ; python_version >= "3.7" and python_version < "3.11"
rocrate==0.6.1
routes==2.5.1 ; python_version >= "3.7" and python_version < "3.11"
rsa==4.9 ; python_version >= "3.7" and python_version < "3.11"
ruamel-yaml-clib==0.2.6 ; platform_python_implementation == "CPython" and python_version < "3.10" and python_version >= "3.7"
Expand Down
135 changes: 134 additions & 1 deletion lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@

from bdbag import bdbag_api as bdb
from boltons.iterutils import remap
from rocrate.model.computationalworkflow import (
ComputationalWorkflow,
WorkflowDescription,
)
from rocrate.rocrate import ROCrate
from sqlalchemy.orm import joinedload
from sqlalchemy.sql import expression
from typing_extensions import Protocol
Expand Down Expand Up @@ -2116,6 +2121,130 @@ def __exit__(self, exc_type, exc_val, exc_tb):
return isinstance(exc_val, TypeError)


class WriteCrates:
def _generate_markdown_readme(self):
markdown_parts: List[str] = []
if self._is_single_invocation_export():
invocation = self.included_invocations[0]
name = invocation.workflow.name
create_time = invocation.create_time
markdown_parts.append("# Galaxy Workflow Invocation Export")
markdown_parts.append("")
markdown_parts.append(f"This crate describes the invocation of workflow {name} executed at {create_time}.")
else:
markdown_parts.append("# Galaxy Dataset Export")

return "\n".join(markdown_parts)

def _is_single_invocation_export(self):
return len(self.included_invocations) == 1

def _init_crate(self):
ro_crate = ROCrate()

markdown_path = os.path.join(self.export_directory, "README.md")
with open(markdown_path, "w") as f:
f.write(self._generate_markdown_readme())

properties = {
"name": "README.md",
"encodingFormat": "text/markdown",
"about": {"@id": "./"},
}
ro_crate.add_file(
markdown_path,
dest_path="README.md",
properties=properties,
)

for dataset, _ in self.included_datasets.values():
if dataset.dataset.id in self.dataset_id_to_path:
file_name, _ = self.dataset_id_to_path[dataset.dataset.id]
name = dataset.name
encoding_format = dataset.datatype.get_mime()
properties = {
"name": name,
"encodingFormat": encoding_format,
}
ro_crate.add_file(
file_name,
dest_path=file_name,
properties=properties,
)

workflows_directory = self.workflows_directory
for filename in os.listdir(workflows_directory):
workflow_cls = ComputationalWorkflow if not filename.endswith(".cwl") else WorkflowDescription
lang = "galaxy" if not filename.endswith(".cwl") else "cwl"
dest_path = os.path.join("workflows", filename)
ro_crate.add_workflow(
source=os.path.join(workflows_directory, filename),
dest_path=dest_path,
main=False,
cls=workflow_cls,
lang=lang,
)

found_workflow_licenses = set()
for workflow_invocation in self.included_invocations:
workflow = workflow_invocation.workflow
license = workflow.license
if license:
found_workflow_licenses.add(license)
if len(found_workflow_licenses) == 1:
ro_crate.license = next(iter(found_workflow_licenses))

# TODO: license per workflow
# TODO: API options to license workflow outputs seprately
# TODO: Export report as PDF and stick it in here
return ro_crate


class ROCrateModelExportStore(DirectoryModelExportStore, WriteCrates):
def __init__(self, crate_directory, **kwds):
self.crate_directory = crate_directory
super().__init__(crate_directory, export_files="symlink", **kwds)

def _finalize(self):
super()._finalize()
ro_crate = self._init_crate()
ro_crate.write(self.crate_directory)


class ROCrateArchiveModelExportStore(DirectoryModelExportStore, WriteCrates):
def __init__(self, uri, **kwds):
temp_output_dir = tempfile.mkdtemp()
self.temp_output_dir = temp_output_dir
if "://" in str(uri):
self.out_file = os.path.join(temp_output_dir, "out")
self.file_source_uri = uri
export_directory = os.path.join(temp_output_dir, "export")
else:
self.out_file = uri
self.file_source_uri = None
export_directory = temp_output_dir
super().__init__(export_directory, **kwds)

def _finalize(self):
super()._finalize()
ro_crate = self._init_crate()
ro_crate.write(self.export_directory)
out_file_name = str(self.out_file)
if out_file_name.endswith(".zip"):
out_file = out_file_name[: -len(".zip")]
else:
out_file = out_file_name
rval = shutil.make_archive(out_file, "zip", self.export_directory)
if not self.file_source_uri:
shutil.move(rval, self.out_file)
else:
file_source_path = self.file_sources.get_file_source_path(self.file_source_uri)
file_source = file_source_path.file_source
assert os.path.exists(self.out_file)
file_source.write_from(file_source_path.path, self.out_file)
shutil.rmtree(self.temp_output_dir)


class TarModelExportStore(DirectoryModelExportStore):
def __init__(self, uri, gzip=True, **kwds):
self.gzip = gzip
Expand Down Expand Up @@ -2182,7 +2311,9 @@ def _finalize(self):


def get_export_store_factory(app, download_format: str, export_files=None) -> Callable[[str], ModelExportStore]:
export_store_class: Union[Type[TarModelExportStore], Type[BagArchiveModelExportStore]]
export_store_class: Union[
Type[TarModelExportStore], Type[BagArchiveModelExportStore], Type[ROCrateArchiveModelExportStore]
]
export_store_class_kwds = {
"app": app,
"export_files": export_files,
Expand All @@ -2194,6 +2325,8 @@ def get_export_store_factory(app, download_format: str, export_files=None) -> Ca
elif download_format in ["tar"]:
export_store_class = TarModelExportStore
export_store_class_kwds["gzip"] = False
elif download_format == "rocrate.zip":
export_store_class = ROCrateArchiveModelExportStore
elif download_format.startswith("bag."):
bag_archiver = download_format[len("bag.") :]
if bag_archiver not in ["zip", "tar", "tgz"]:
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -1301,6 +1301,7 @@ class ModelStoreFormat(str, Enum):
BAG_DOT_ZIP = "bag.zip"
BAG_DOT_TAR = "bag.tar"
BAG_DOT_TGZ = "bag.tgz"
ROCRATE_ZIP = "rocrate.zip"


class StoreContentSource(Model):
Expand Down
1 change: 1 addition & 0 deletions packages/data/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ install_requires =
pylibmagic
python-magic
pysam
rocrate
social-auth-core[openidconnect]==4.0.3
SQLAlchemy>=1.4.25,<2
tifffile<=2020.9.3 # Last version compatible with python 3.6
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ python-multipart = "*" # required to support form parsing in FastAPI/Starlette
PyYAML = "*"
refgenconf = ">=0.12.0"
requests = "*"
rocrate = "*"
Routes = "*"
schema-salad = "!=8.3.20220721194857" # https://github.com/common-workflow-language/schema_salad/issues/575
social-auth-core = {version = "==4.0.3", extras = ["openidconnect"]}
Expand Down
22 changes: 22 additions & 0 deletions test/integration/test_workflow_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Dict,
)

from galaxy.util.compression_utils import CompressedFile
from galaxy_test.api.test_workflows import RunsWorkflowFixtures
from galaxy_test.base import api_asserts
from galaxy_test.base.api import UsesCeleryTasks
Expand Down Expand Up @@ -53,6 +54,27 @@ def test_export_import_invocation_with_input_as_output_uris(self):
def test_export_import_invocation_with_input_as_output_sts(self):
self._test_export_import_invocation_with_input_as_output(False)

def test_export_ro_crate_basic(self):
ro_crate_path = self._export_ro_crate(False)
assert CompressedFile(ro_crate_path).file_type == "zip"

def _export_ro_crate(self, to_uri):
with self.dataset_populator.test_history() as history_id:
summary = self._run_workflow_with_runtime_data_column_parameter(history_id)
invocation_id = summary.invocation_id
extension = "rocrate.zip"
if to_uri:
uri = f"gxfiles://posix_test/invocation.{extension}"
self.workflow_populator.download_invocation_to_uri(invocation_id, uri, extension=extension)
root = self.root_dir
invocation_path = os.path.join(root, f"invocation.{extension}")
assert os.path.exists(invocation_path)
uri = invocation_path
else:
temp_tar = self.workflow_populator.download_invocation_to_store(invocation_id, extension=extension)
uri = temp_tar
return uri

def _test_export_import_invocation_collection_input(self, use_uris, model_store_format="tgz"):
with self.dataset_populator.test_history() as history_id:
summary = self._run_workflow_with_output_collections(history_id)
Expand Down
71 changes: 71 additions & 0 deletions test/unit/data/model/test_model_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
)

import pytest
from rocrate.rocrate import ROCrate
from sqlalchemy.orm.scoping import scoped_session

from galaxy import model
Expand Down Expand Up @@ -334,6 +335,75 @@ def test_import_export_invocation():
import_model_store.perform_import(history=h2)


def validate_crate_metadata(as_dict):
assert as_dict["@context"] == "https://w3id.org/ro/crate/1.1/context"


def validate_has_pl_galaxy(ro_crate: ROCrate):
found = False
for e in ro_crate.get_entities():
if e.id == "https://w3id.org/workflowhub/workflow-ro-crate#galaxy":
found = True
assert e.url == "https://galaxyproject.org/"
assert found


def validate_has_mit_license(ro_crate: ROCrate):
found_license = False
for e in ro_crate.get_entities():
if e.id == "./":
assert e["license"] == "MIT"
found_license = True
assert found_license


def validate_has_readme(ro_crate: ROCrate):
found_readme = False
for e in ro_crate.get_entities():
if e.id == "README.md":
assert e.type == "File"
assert e["encodingFormat"] == "text/markdown"
# assert e["about"] == "./"
found_readme = True
assert found_readme


def validate_crate_directory(crate_directory):
metadata_json_path = crate_directory / "ro-crate-metadata.json"
with metadata_json_path.open() as f:
metadata_json = json.load(f)
validate_crate_metadata(metadata_json)
crate = ROCrate(crate_directory)
validate_has_pl_galaxy(crate)
validate_has_mit_license(crate)
validate_has_readme(crate)
# print(json.dumps(metadata_json, indent=4))


def test_export_invocation_to_ro_crate(tmp_path):
app = _mock_app()
workflow_invocation = _setup_invocation(app)

crate_directory = tmp_path / "crate"
with store.ROCrateModelExportStore(crate_directory, app=app) as export_store:
export_store.export_workflow_invocation(workflow_invocation)
validate_crate_directory(crate_directory)


def test_export_invocation_to_ro_crate_archive(tmp_path):
app = _mock_app()
workflow_invocation = _setup_invocation(app)

crate_zip = tmp_path / "crate.zip"
with store.ROCrateArchiveModelExportStore(crate_zip, app=app) as export_store:
export_store.export_workflow_invocation(workflow_invocation)
compressed_file = CompressedFile(crate_zip)
assert compressed_file.file_type == "zip"
compressed_file.extract(tmp_path)
crate_directory = tmp_path / "crate"
validate_crate_directory(crate_directory)


def test_finalize_job_state():
"""Verify jobs are given finalized states on import."""
app, h, temp_directory, import_history = _setup_simple_export({"for_edit": False})
Expand Down Expand Up @@ -665,6 +735,7 @@ def _setup_invocation(app):
workflow_step_1.type = "data_input"
sa_session.add(workflow_step_1)
workflow_1 = _workflow_from_steps(u, [workflow_step_1])
workflow_1.license = "MIT"
sa_session.add(workflow_1)
workflow_invocation = _invocation_for_workflow(u, workflow_1)
invocation_step = model.WorkflowInvocationStep()
Expand Down

0 comments on commit 3dbe289

Please sign in to comment.