Skip to content

Commit

Permalink
Merge pull request #820 from dandi/gh-817
Browse files Browse the repository at this point in the history
Make download fail immediately on nonexistent resources
  • Loading branch information
yarikoptic authored Oct 28, 2021
2 parents 1620fcd + eca1fe3 commit c28a4f0
Show file tree
Hide file tree
Showing 3 changed files with 237 additions and 64 deletions.
166 changes: 103 additions & 63 deletions dandi/dandiarchive.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ class ParsedDandiURL(ABC, BaseModel):
api_url: AnyHttpUrl
#: The ID of the Dandiset given in the URL
dandiset_id: Optional[str]
#: The version of the Dandiset, if specified. If this is not set, methods
#: that need the Dandiset version will call `get_version_id()` to get an
#: appropriate default value.
#: The version of the Dandiset, if specified. If this is not set, the
#: version will be defaulted using the rules described under
#: `DandiAPIClient.get_dandiset()`.
version_id: Optional[str] = None

@validator("api_url")
Expand All @@ -74,37 +74,25 @@ def get_client(self) -> DandiAPIClient:
"""
return DandiAPIClient(self.api_url)

def get_dandiset(self, client: DandiAPIClient) -> Optional[RemoteDandiset]:
def get_dandiset(
self, client: DandiAPIClient, lazy: bool = True
) -> Optional[RemoteDandiset]:
"""
Returns information about the specified (or default) version of the
specified Dandiset. Returns `None` if the URL did not contain a
Dandiset identifier.
If ``lazy`` is true, a "lazy" `RemoteDandiset` instance is returned,
with no requests made until any data is actually required.
"""
if self.dandiset_id is not None:
return client.get_dandiset(self.dandiset_id, self.get_version_id(client))
return client.get_dandiset(self.dandiset_id, self.version_id, lazy=lazy)
else:
return None

def get_version_id(self, client: DandiAPIClient) -> str:
"""
Returns `version_id` or determines a default value if unset.
If `version_id` is non-`None`, returns it. Otherwise, the ID of the
most recent published version of the Dandiset is returned, if any,
otherwise the ID of the draft version is returned.
"""
if self.dandiset_id is None:
raise ValueError("Cannot get version for Dandiset-less URL")
elif self.version_id is None:
r = client.get(f"/dandisets/{self.dandiset_id}/")
version = r["most_recent_published_version"] or r["draft_version"]
return version["version"]
else:
return self.version_id

@abstractmethod
def get_assets(
self, client: DandiAPIClient, order: Optional[str] = None
self, client: DandiAPIClient, order: Optional[str] = None, strict: bool = False
) -> Iterator[BaseRemoteAsset]:
"""
Returns an iterator of asset structures for the assets referred to by
Expand All @@ -117,6 +105,10 @@ def get_assets(
by passing the name of that field as the ``order`` parameter. The
accepted field names are ``"created"``, ``"modified"``, and ``"path"``.
Prepend a hyphen to the field name to reverse the sort order.
If ``strict`` is true, then fetching assets for a URL that refers to a
nonexistent resource will raise a `NotFoundError`; if it is false, the
method will instead return an empty iterator.
"""
...

Expand All @@ -130,7 +122,7 @@ def get_asset_ids(self, client: DandiAPIClient) -> Iterator[str]:

@contextmanager
def navigate(
self,
self, strict: bool = False
) -> Iterator[
Tuple[DandiAPIClient, Optional[RemoteDandiset], Iterator[BaseRemoteAsset]]
]:
Expand All @@ -139,11 +131,19 @@ def navigate(
`~dandi.dandiapi.DandiAPIClient` (with an open session that is closed
when the context manager closes), the return value of `get_dandiset()`,
and the return value of `get_assets()`.
If ``strict`` is true, then `get_dandiset()` is called with
``lazy=False`` and `get_assets()` is called with ``strict=True``; if
``strict`` is false, the opposite occurs.
"""
# We could later try to "dandi_authenticate" if run into permission
# issues. May be it could be not just boolean but the "id" to be used?
with self.get_client() as client:
yield (client, self.get_dandiset(client), self.get_assets(client))
yield (
client,
self.get_dandiset(client, lazy=not strict),
self.get_assets(client, strict=strict),
)


class DandisetURL(ParsedDandiURL):
Expand All @@ -152,10 +152,13 @@ class DandisetURL(ParsedDandiURL):
"""

def get_assets(
self, client: DandiAPIClient, order: Optional[str] = None
self, client: DandiAPIClient, order: Optional[str] = None, strict: bool = False
) -> Iterator[BaseRemoteAsset]:
"""Returns all assets in the Dandiset"""
return self.get_dandiset(client).get_assets(order=order)
with _maybe_strict(strict):
yield from self.get_dandiset(client, lazy=not strict).get_assets(
order=order
)


class SingleAssetURL(ParsedDandiURL):
Expand All @@ -179,16 +182,15 @@ class BaseAssetIDURL(SingleAssetURL):
asset_id: str

def get_assets(
self, client: DandiAPIClient, order: Optional[str] = None
self, client: DandiAPIClient, order: Optional[str] = None, strict: bool = False
) -> Iterator[BaseRemoteAsset]:
"""
Yields the asset with the given ID. Yields nothing if the asset does
not exist.
Yields the asset with the given ID. If the asset does not exist, then
a `NotFoundError` is raised if ``strict`` is true, and nothing is
yielded if ``strict`` is false.
"""
try:
with _maybe_strict(strict):
yield client.get_asset(self.asset_id)
except NotFoundError:
return

def get_asset_ids(self, client: DandiAPIClient) -> Iterator[str]:
"""Yields the ID of the asset (regardless of whether it exists)"""
Expand All @@ -203,16 +205,15 @@ class AssetIDURL(SingleAssetURL):
asset_id: str

def get_assets(
self, client: DandiAPIClient, order: Optional[str] = None
self, client: DandiAPIClient, order: Optional[str] = None, strict: bool = False
) -> Iterator[BaseRemoteAsset]:
"""
Yields the asset with the given ID. Yields nothing if the asset does
not exist.
Yields the asset with the given ID. If the Dandiset or asset does not
exist, then a `NotFoundError` is raised if ``strict`` is true, and
nothing is yielded if ``strict`` is false.
"""
try:
yield self.get_dandiset(client).get_asset(self.asset_id)
except NotFoundError:
return
with _maybe_strict(strict):
yield self.get_dandiset(client, lazy=not strict).get_asset(self.asset_id)

def get_asset_ids(self, client: DandiAPIClient) -> Iterator[str]:
"""Yields the ID of the asset (regardless of whether it exists)"""
Expand All @@ -225,40 +226,60 @@ class AssetPathPrefixURL(MultiAssetURL):
"""

def get_assets(
self, client: DandiAPIClient, order: Optional[str] = None
self, client: DandiAPIClient, order: Optional[str] = None, strict: bool = False
) -> Iterator[BaseRemoteAsset]:
"""Returns the assets whose paths start with `path`"""
return self.get_dandiset(client).get_assets_with_path_prefix(
self.path, order=order
)
with _maybe_strict(strict):
yield from self.get_dandiset(
client, lazy=not strict
).get_assets_with_path_prefix(self.path, order=order)


class AssetItemURL(SingleAssetURL):
"""Parsed from a URL that refers to a specific asset by path"""

path: str

def get_assets(self, client: DandiAPIClient) -> Iterator[BaseRemoteAsset]:
def get_assets(
self, client: DandiAPIClient, order: Optional[str] = None, strict: bool = False
) -> Iterator[BaseRemoteAsset]:
"""
Yields the asset whose path equals `path`. If there is no such asset,
this method yields nothing, unless the path happens to be the path to
an asset directory, in which case a `ValueError` is raised indicating
that the user left off a trailing slash.
Yields the asset whose path equals `path`. If there is no such asset:
- If ``strict`` is true, a `NotFoundError` is raised.
- If ``strict`` is false and the path happens to be the path to an
asset directory, a `ValueError` is raised indicating that the user
left off a trailing slash.
- Otherwise, nothing is yielded.
"""
d = self.get_dandiset(client)
try:
asset = d.get_asset_by_path(self.path)
dandiset = self.get_dandiset(client, lazy=not strict)
# Force evaluation of the version here instead of when
# get_asset_by_path() is called so we don't get nonexistent
# dandisets with unspecified versions mixed up with nonexistent
# assets.
dandiset.version_id
except NotFoundError:
if strict:
raise
else:
return
try:
yield dandiset.get_asset_by_path(self.path)
except NotFoundError:
if strict:
raise
try:
next(d.get_assets_with_path_prefix(self.path + "/"))
next(dandiset.get_assets_with_path_prefix(self.path + "/"))
except NotFoundError:
# Dandiset has explicit version that doesn't exist.
return
except StopIteration:
pass
else:
raise ValueError(
f"Asset path {self.path!r} points to a directory but lacks trailing /"
)
else:
yield asset


class AssetFolderURL(MultiAssetURL):
Expand All @@ -269,7 +290,7 @@ class AssetFolderURL(MultiAssetURL):
path: str

def get_assets(
self, client: DandiAPIClient, order: Optional[str] = None
self, client: DandiAPIClient, order: Optional[str] = None, strict: bool = False
) -> Iterator[BaseRemoteAsset]:
"""
Returns all assets under the folder at `path`. Yields nothing if the
Expand All @@ -278,12 +299,24 @@ def get_assets(
path = self.path
if not path.endswith("/"):
path += "/"
return self.get_dandiset(client).get_assets_with_path_prefix(path, order=order)
with _maybe_strict(strict):
yield from self.get_dandiset(
client, lazy=not strict
).get_assets_with_path_prefix(path, order=order)


@contextmanager
def _maybe_strict(strict: bool) -> Iterator[None]:
try:
yield
except NotFoundError:
if strict:
raise


@contextmanager
def navigate_url(
url: str,
url: str, strict: bool = False
) -> Iterator[
Tuple[DandiAPIClient, Optional[RemoteDandiset], Iterator[BaseRemoteAsset]]
]:
Expand All @@ -295,11 +328,16 @@ def navigate_url(
no specific assets were specified, all assets in the Dandiset).
:param str url: URL which might point to a Dandiset, folder, or asset(s)
:returns: Generator of one ``(client, dandiset, assets)``; ``client`` will
have established a session for the duration of the context
:param bool strict:
If true, then `get_dandiset()` is called with ``lazy=False`` and
`get_assets()` is called with ``strict=True``; if false, the opposite
occurs.
:returns: Context manager that yields a ``(client, dandiset, assets)``
tuple; ``client`` will have a session established for the duration of
the context
"""
parsed_url = parse_dandi_url(url)
with parsed_url.navigate() as (client, dandiset, assets):
with parsed_url.navigate(strict=strict) as (client, dandiset, assets):
yield (client, dandiset, assets)


Expand Down Expand Up @@ -470,9 +508,11 @@ def parse(cls, url: str, *, map_instance: bool = True) -> ParsedDandiURL:
- :samp:`dandi://{instance-name}/{dandiset-id}[@{version}][/{path}]`,
where ``instance-name`` is the name of a registered Dandi Archive
instance. If ``path`` is not specified, the URL refers to a Dandiset
and is converted to a `DandisetURL`. If ``path`` is specified, the
URL refers to all assets in the given Dandiset whose paths begin with
the prefix ``path``, and it is converted to an `AssetPathPrefixURL`
and is converted to a `DandisetURL`. If ``path`` is specified and it
ends with a forward slash, the URL refers to an asset folder and is
converted to an `AssetFolderURL` instance; if it does not end with a
slash, it refers to a single asset and is converted to an
`AssetItemURL` instance.
- Any other HTTPS URL that redirects to one of the above
Expand Down
2 changes: 1 addition & 1 deletion dandi/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def download_generator(
"""

with parsed_url.navigate() as (client, dandiset, assets):
with parsed_url.navigate(strict=True) as (client, dandiset, assets):
if assets_it:
assets_it.gen = assets
assets = assets_it
Expand Down
Loading

0 comments on commit c28a4f0

Please sign in to comment.