Skip to content

Commit

Permalink
implement display function for all datatypes
Browse files Browse the repository at this point in the history
display the first 100K of a binary file or a file whose type is unknown to Galaxy
as ASCII text instead of downloading this file.
  • Loading branch information
SergeyYakubov committed Jan 30, 2023
1 parent 8e16a82 commit cfe6ef0
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 48 deletions.
146 changes: 98 additions & 48 deletions lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,36 @@ def __init__(cls, name, bases, dict_):
metadata.Statement.process(cls)


def _is_binary_file(data):
from galaxy.datatypes import binary

return isinstance(data.datatype, binary.Binary) or type(data.datatype) is Data


def _get_max_peak_size(data):
from galaxy.datatypes import (
binary,
text,
)

max_peek_size = DEFAULT_MAX_PEEK_SIZE # 1 MB
if isinstance(data.datatype, text.Html):
max_peek_size = 10000000 # 10 MB for html
elif isinstance(data.datatype, binary.Binary):
max_peek_size = 100000 # 100 KB for binary
return max_peek_size


def _get_file_size(data):
file_size = int(data.dataset.file_size or 0)
if file_size == 0:
if data.dataset.object_store:
file_size = data.dataset.object_store.size(data.dataset)
else:
file_size = os.stat(data.file_name).st_size
return file_size


@p_dataproviders.decorators.has_dataproviders
class Data(metaclass=DataMeta):
"""
Expand Down Expand Up @@ -427,6 +457,57 @@ def to_archive(self, dataset: DatasetProtocol, name: str = "") -> Iterable:
file_paths.append(dataset.file_name)
return zip(file_paths, rel_paths)

def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):
composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
composite_extensions.append("html") # for archiving composite datatypes
composite_extensions.append("data_manager_json") # for downloading bundles if bundled.

if data.extension in composite_extensions:
return self._archive_composite_dataset(trans, data, headers, do_action=kwd.get("do_action", "zip"))
else:
headers["Content-Length"] = str(file_size)
filename = self._download_filename(
data,
to_ext,
hdca=kwd.get("hdca"),
element_identifier=kwd.get("element_identifier"),
filename_pattern=kwd.get("filename_pattern"),
)
headers[
"content-type"
] = "application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename
headers["Content-Disposition"] = f'attachment; filename="{filename}"'
return open(data.file_name, "rb"), headers

def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size):
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/binary_file.mako",
data=data,
file_contents=open(data.file_name, "rb").read(max_peek_size),
file_size=util.nice_size(file_size),
truncated=file_size > max_peek_size,
),
headers,
)

def _serve_file_contents(self, trans, data, headers, preview, file_size, max_peek_size):
from galaxy.datatypes import images

preview = util.string_as_bool(preview)
if not preview or isinstance(data.datatype, images.Image) or file_size < max_peek_size:
return self._yield_user_file_content(trans, data, data.file_name, headers), headers

# preview large text file
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/large_file.mako", truncated_data=open(data.file_name, "rb").read(max_peek_size), data=data
),
headers,
)

def display_data(
self,
trans,
Expand All @@ -446,13 +527,10 @@ def display_data(
providers?).
"""
headers = kwd.get("headers", {})
# Relocate all composite datatype display to a common location.
composite_extensions = trans.app.datatypes_registry.get_composite_extensions()
composite_extensions.append("html") # for archiving composite datatypes
composite_extensions.append("data_manager_json") # for downloading bundles if bundled.
# Prevent IE8 from sniffing content type since we're explicit about it. This prevents intentionally text/plain
# content from being rendered in the browser
headers["X-Content-Type-Options"] = "nosniff"

if filename and filename != "index":
# For files in extra_files_path
extra_dir = dataset.dataset.extra_files_path_name
Expand Down Expand Up @@ -497,52 +575,24 @@ def display_data(
raise ObjectNotFound(f"Could not find '{filename}' on the extra files path {file_path}.")
self._clean_and_set_mime_type(trans, dataset.get_mime(), headers)

trans.log_event(f"Display dataset id: {str(dataset.id)}")
from galaxy.datatypes import ( # DBTODO REMOVE THIS AT REFACTOR
binary,
images,
text,
)
downloading = to_ext is not None
file_size = _get_file_size(dataset)

if to_ext or isinstance(dataset.datatype, binary.Binary): # Saving the file, or binary file
if dataset.extension in composite_extensions:
return self._archive_composite_dataset(trans, dataset, headers, do_action=kwd.get("do_action", "zip"))
else:
headers["Content-Length"] = str(os.stat(dataset.file_name).st_size)
filename = self._download_filename(
dataset,
to_ext,
hdca=kwd.get("hdca"),
element_identifier=kwd.get("element_identifier"),
filename_pattern=kwd.get("filename_pattern"),
)
headers[
"content-type"
] = "application/octet-stream" # force octet-stream so Safari doesn't append mime extensions to filename
headers["Content-Disposition"] = f'attachment; filename="{filename}"'
return open(dataset.file_name, "rb"), headers
if not os.path.exists(dataset.file_name):
raise ObjectNotFound(f"File Not Found ({dataset.file_name}).")
max_peek_size = DEFAULT_MAX_PEEK_SIZE # 1 MB
if isinstance(dataset.datatype, text.Html):
max_peek_size = 10000000 # 10 MB for html
preview = util.string_as_bool(preview)
if (
not preview
or isinstance(dataset.datatype, images.Image)
or os.stat(dataset.file_name).st_size < max_peek_size
):
return self._yield_user_file_content(trans, dataset, dataset.file_name, headers), headers
else:
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/large_file.mako",
truncated_data=open(dataset.file_name, "rb").read(max_peek_size),
dataset=dataset,
),
headers,
)

if downloading:
trans.log_event(f"Download dataset id: {str(dataset.id)}")
return self._serve_file_download(headers, dataset, trans, to_ext, file_size, **kwd)
else: # displaying
trans.log_event(f"Display dataset id: {str(dataset.id)}")
max_peek_size = _get_max_peak_size(dataset)
if (
_is_binary_file(dataset) and preview and hasattr(trans, "fill_template_mako")
): # preview file which format is unknown (to Galaxy), we still try to display this as text
return self._serve_binary_file_contents_as_text(trans, dataset, headers, file_size, max_peek_size)
else: # text/html, or image, or display was called without preview flag
return self._serve_file_contents(trans, dataset, headers, preview, file_size, max_peek_size)

def display_as_markdown(self, dataset_instance: DatasetProtocol) -> str:
"""Prepare for embedding dataset into a basic Markdown document.
Expand All @@ -551,7 +601,7 @@ def display_as_markdown(self, dataset_instance: DatasetProtocol) -> str:
on datatypes not tightly tied to a Galaxy version (e.g. datatypes in the
Tool Shed).
Speaking very losely - the datatype should should load a bounded amount
Speaking very losely - the datatype should load a bounded amount
of data from the supplied dataset instance and prepare for embedding it
into Markdown. This should be relatively vanilla Markdown - the result of
this is bleached and it should not contain nested Galaxy Markdown
Expand Down
17 changes: 17 additions & 0 deletions templates/webapps/galaxy/dataset/binary_file.mako
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<%inherit file="/base.mako"/>
<%namespace file="/dataset/display.mako" import="render_deleted_data_message" />

${ render_deleted_data_message( data ) }

<div class="warningmessagelarge">
This is a binary (or unknown to Galaxy) dataset of size ${ file_size }. Preview is not implemented for this filetype. Displaying
%if truncated:
first 100KB
%endif
as ASCII text<br/>
<a href="${h.url_for( controller='dataset', action='display', dataset_id=trans.security.encode_id( data.id ), to_ext=data.ext )}">Download</a>
</div>

<pre>
${ util.unicodify( file_contents ) | h }
</pre>

0 comments on commit cfe6ef0

Please sign in to comment.