Skip to content
This repository has been archived by the owner on Nov 19, 2024. It is now read-only.

Commit

Permalink
Merge pull request #55 from lincbrain/ak-zarr-fix
Browse files Browse the repository at this point in the history
Merge updates for zarr completion & doi completion upstream in dandi cli
  • Loading branch information
aaronkanzer authored Aug 2, 2024
2 parents 8433acf + 45f8d9a commit a106fb3
Show file tree
Hide file tree
Showing 25 changed files with 475 additions and 158 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ jobs:
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade tox
# Annotate codespell within PR
- uses: codespell-project/codespell-problem-matcher@v1
- name: Run linters
run: |
tox -e lint
6 changes: 6 additions & 0 deletions docs/source/cmdline/download.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ Options

Whether to interpret asset paths in URLs as exact matches or glob patterns

.. option:: --preserve-tree

When downloading only part of a Dandiset, also download
:file:`dandiset.yaml` and do not strip leading directories from asset
paths. Implies ``--download all``.

.. option:: --sync

Delete local assets that do not exist on the server after downloading
46 changes: 43 additions & 3 deletions lincbrain/cli/cmd_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,45 @@
from ..download import DownloadExisting, DownloadFormat, PathType
from ..utils import get_instance, joinurl

_examples = """
EXAMPLES:
\b
- Download only the dandiset.yaml
dandi download --download dandiset.yaml DANDI:000027
\b
- Download only dandiset.yaml if there is a newer version
dandi download https://identifiers.org/DANDI:000027 --existing refresh
\b
- Download only the assets
dandi download --download assets DANDI:000027
\b
- Download all from a specific version
dandi download DANDI:000027/0.210831.2033
\b
- Download a specific directory
dandi download dandi://DANDI/000027@0.210831.2033/sub-RAT123/
\b
- Download a specific file
dandi download dandi://DANDI/000027@0.210831.2033/sub-RAT123/sub-RAT123.nwb
"""


# The use of f-strings apparently makes this not a proper docstring, and so
# click doesn't use it unless we explicitly assign it to `help`:
@click.command(
help=f"""\
Download files or entire folders from LINC.
\b
{_dandi_url_parser.resource_identifier_primer}
\b
{_dandi_url_parser.known_patterns}
"""
{_examples}
"""
)
@click.option(
"-o",
Expand Down Expand Up @@ -71,6 +100,15 @@
default="all",
show_default=True,
)
@click.option(
"--preserve-tree",
is_flag=True,
help=(
"When downloading only part of a Dandiset, also download"
" `dandiset.yaml` and do not strip leading directories from asset"
" paths. Implies `--download all`."
),
)
@click.option(
"--sync", is_flag=True, help="Delete local assets that do not exist on the server"
)
Expand Down Expand Up @@ -109,6 +147,7 @@ def download(
sync: bool,
dandi_instance: str,
path_type: PathType,
preserve_tree: bool,
) -> None:
# We need to import the download module rather than the download function
# so that the tests can properly patch the function with a mock.
Expand Down Expand Up @@ -142,8 +181,9 @@ def download(
format=format,
jobs=jobs[0],
jobs_per_zarr=jobs[1],
get_metadata="dandiset.yaml" in download_types,
get_assets="assets" in download_types,
get_metadata="dandiset.yaml" in download_types or preserve_tree,
get_assets="assets" in download_types or preserve_tree,
preserve_tree=preserve_tree,
sync=sync,
path_type=path_type,
# develop_debug=develop_debug
Expand Down
2 changes: 2 additions & 0 deletions lincbrain/cli/cmd_ls.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
The arguments may be either resource identifiers or paths to local
files/directories.
\b
{_dandi_url_parser.resource_identifier_primer}
\b
{_dandi_url_parser.known_patterns}
"""
Expand Down
2 changes: 1 addition & 1 deletion lincbrain/cli/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def main(ctx, log_level, pdb=False):

logdir = platformdirs.user_log_dir("lincbrain-cli", "lincbrain")
logfile = os.path.join(
logdir, f"{datetime.now(timezone.utc):%Y%m%d%H%M%SZ}-{os.getpid()}.log"
logdir, f"{datetime.now(timezone.utc):%Y.%m.%d-%H.%M.%SZ}-{os.getpid()}.log"
)
os.makedirs(logdir, exist_ok=True)
handler = logging.FileHandler(logfile, encoding="utf-8")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"contributor": [
{
"name": "Tests, Dandi-Cli",
"email": "nemo@example.com",
"roleName": [
"dcite:Author",
"dcite:ContactPerson"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"contributor": [
{
"name": "Tests, Dandi-Cli",
"email": "nemo@example.com",
"roleName": [
"dcite:Author",
"dcite:ContactPerson"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"contributor": [
{
"name": "Tests, Dandi-Cli",
"email": "nemo@example.com",
"roleName": [
"dcite:Author",
"dcite:ContactPerson"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"contributor": [
{
"name": "Tests, Dandi-Cli",
"email": "nemo@example.com",
"roleName": [
"dcite:Author",
"dcite:ContactPerson"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"contributor": [
{
"name": "Tests, Dandi-Cli",
"email": "nemo@example.com",
"roleName": [
"dcite:Author",
"dcite:ContactPerson"
Expand Down
7 changes: 7 additions & 0 deletions lincbrain/cli/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def test_download_defaults(mocker):
jobs_per_zarr=None,
get_metadata=True,
get_assets=True,
preserve_tree=False,
sync=False,
path_type=PathType.EXACT,
)
Expand All @@ -41,6 +42,7 @@ def test_download_all_types(mocker):
jobs_per_zarr=None,
get_metadata=True,
get_assets=True,
preserve_tree=False,
sync=False,
path_type=PathType.EXACT,
)
Expand All @@ -59,6 +61,7 @@ def test_download_metadata_only(mocker):
jobs_per_zarr=None,
get_metadata=True,
get_assets=False,
preserve_tree=False,
sync=False,
path_type=PathType.EXACT,
)
Expand All @@ -77,6 +80,7 @@ def test_download_assets_only(mocker):
jobs_per_zarr=None,
get_metadata=False,
get_assets=True,
preserve_tree=False,
sync=False,
path_type=PathType.EXACT,
)
Expand Down Expand Up @@ -110,6 +114,7 @@ def test_download_gui_instance_in_dandiset(mocker):
jobs_per_zarr=None,
get_metadata=True,
get_assets=True,
preserve_tree=False,
sync=False,
path_type=PathType.EXACT,
)
Expand All @@ -135,6 +140,7 @@ def test_download_api_instance_in_dandiset(mocker):
jobs_per_zarr=None,
get_metadata=True,
get_assets=True,
preserve_tree=False,
sync=False,
path_type=PathType.EXACT,
)
Expand All @@ -160,6 +166,7 @@ def test_download_url_instance_match(mocker):
jobs_per_zarr=None,
get_metadata=True,
get_assets=True,
preserve_tree=False,
sync=False,
path_type=PathType.EXACT,
)
Expand Down
6 changes: 6 additions & 0 deletions lincbrain/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ def pytest_addoption(parser: Parser) -> None:
default=False,
help="Only run tests of the new Django Dandi API",
)
parser.addoption(
"--scheduled",
action="store_true",
default=False,
help="Use configuration for a scheduled daily test run",
)


def pytest_collection_modifyitems(items: list[Item], config: Config) -> None:
Expand Down
41 changes: 40 additions & 1 deletion lincbrain/dandiapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
import os.path
from pathlib import Path, PurePosixPath
import posixpath
import re
from time import sleep, time
from types import TracebackType
Expand Down Expand Up @@ -233,6 +234,8 @@ def request(
url,
result.text,
)
if data is not None and hasattr(data, "seek"):
data.seek(0)
result.raise_for_status()
except Exception as e:
if isinstance(e, requests.HTTPError):
Expand Down Expand Up @@ -935,6 +938,29 @@ def from_data(cls, client: DandiAPIClient, data: dict[str, Any]) -> RemoteDandis
client=client, identifier=data["identifier"], version=version, data=data
)

@staticmethod
def _normalize_path(path: str) -> str:
"""
Helper to normalize path before passing it to the server.
We and API call it "path" but it is really a "prefix" with inherent
semantics of containing directory divider '/' and referring to a
directory when terminated with '/'.
"""
# Server (now) expects path to be a proper prefix, so to account for user
# possibly specifying ./ or some other relative paths etc, let's normalize
# the path.
# Ref: https://github.com/dandi/dandi-cli/issues/1452
path_normed = posixpath.normpath(path)
if path_normed == ".":
path_normed = ""
elif path.endswith("/"):
# we need to make sure that we have a trailing slash if we had it before
path_normed += "/"
if path_normed != path:
lgr.debug("Normalized path %r to %r", path, path_normed)
return path_normed

def json_dict(self) -> dict[str, Any]:
"""
Convert to a JSONable `dict`, omitting the ``client`` attribute and
Expand Down Expand Up @@ -1152,6 +1178,9 @@ def get_assets_with_path_prefix(
Returns an iterator of all assets in this version of the Dandiset whose
`~RemoteAsset.path` attributes start with ``path``
``path`` is normalized first to possibly remove leading ``./`` or relative
paths (e.g., ``../``) within it.
Assets can be sorted by a given field by passing the name of that field
as the ``order`` parameter. The accepted field names are
``"created"``, ``"modified"``, and ``"path"``. Prepend a hyphen to the
Expand All @@ -1160,7 +1189,7 @@ def get_assets_with_path_prefix(
try:
for a in self.client.paginate(
f"{self.version_api_path}assets/",
params={"path": path, "order": order},
params={"path": self._normalize_path(path), "order": order},
):
yield RemoteAsset.from_data(self, a)
except HTTP404Error:
Expand Down Expand Up @@ -1198,7 +1227,11 @@ def get_asset_by_path(self, path: str) -> RemoteAsset:
Fetch the asset in this version of the Dandiset whose
`~RemoteAsset.path` equals ``path``. If the given asset does not
exist, a `NotFoundError` is raised.
``path`` is normalized first to possibly remove leading ``./`` or relative
paths (e.g., ``../``) within it.
"""
path = self._normalize_path(path)
try:
# Weed out any assets that happen to have the given path as a
# proper prefix:
Expand All @@ -1219,9 +1252,15 @@ def download_directory(
"""
Download all assets under the virtual directory ``assets_dirpath`` to
the directory ``dirpath``. Downloads are synchronous.
``path`` is normalized first to possibly remove leading ``./`` or relative
paths (e.g., ``../``) within it.
"""
if assets_dirpath and not assets_dirpath.endswith("/"):
assets_dirpath += "/"
# need to normalize explicitly since we do use it below also
# to deduce portion of the path to strip off.
assets_dirpath = self._normalize_path(assets_dirpath)
assets = list(self.get_assets_with_path_prefix(assets_dirpath))
for a in assets:
filepath = Path(dirpath, a.path[len(assets_dirpath) :])
Expand Down
Loading

0 comments on commit a106fb3

Please sign in to comment.