Skip to content

Commit 39b12e3

Browse files
committed
dvcfs: implement basic du
1 parent 77b451d commit 39b12e3

File tree

5 files changed

+166
-0
lines changed

5 files changed

+166
-0
lines changed

dvc/cli/parser.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
data_sync,
2020
destroy,
2121
diff,
22+
du,
2223
experiments,
2324
freeze,
2425
gc,
@@ -95,6 +96,7 @@
9596
data,
9697
artifacts,
9798
studio,
99+
du,
98100
]
99101

100102

dvc/commands/du.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import argparse
2+
import logging
3+
4+
from dvc.cli import completion
5+
from dvc.cli.command import CmdBaseNoRepo
6+
from dvc.cli.utils import DictAction, append_doc_link
7+
from dvc.ui import ui
8+
9+
logger = logging.getLogger(__name__)
10+
11+
12+
class CmdDU(CmdBaseNoRepo):
13+
def run(self):
14+
from dvc.repo import Repo
15+
from dvc.utils.humanize import naturalsize
16+
17+
entries = Repo.du(
18+
self.args.url,
19+
self.args.path,
20+
rev=self.args.rev,
21+
summarize=self.args.summarize,
22+
config=self.args.config,
23+
remote=self.args.remote,
24+
remote_config=self.args.remote_config,
25+
)
26+
ui.table([(naturalsize(size), path) for path, size in entries])
27+
return 0
28+
29+
30+
def add_parser(subparsers, parent_parser):
31+
DU_HELP = "Show disk usage."
32+
du_parser = subparsers.add_parser(
33+
"du",
34+
parents=[parent_parser],
35+
description=append_doc_link(DU_HELP, "du"),
36+
help=DU_HELP,
37+
formatter_class=argparse.RawTextHelpFormatter,
38+
)
39+
du_parser.add_argument("url", help="Location of DVC repository")
40+
du_parser.add_argument(
41+
"--rev",
42+
nargs="?",
43+
help="Git revision (e.g. SHA, branch, tag)",
44+
metavar="<commit>",
45+
)
46+
du_parser.add_argument(
47+
"-s",
48+
"--summarize",
49+
action="store_true",
50+
help="Show total disk usage.",
51+
)
52+
du_parser.add_argument(
53+
"--config",
54+
type=str,
55+
help=(
56+
"Path to a config file that will be merged with the config "
57+
"in the target repository."
58+
),
59+
)
60+
du_parser.add_argument(
61+
"--remote",
62+
type=str,
63+
help="Remote name to set as a default in the target repository.",
64+
)
65+
du_parser.add_argument(
66+
"--remote-config",
67+
type=str,
68+
nargs="*",
69+
action=DictAction,
70+
help=(
71+
"Remote config options to merge with a remote's config (default or one "
72+
"specified by '--remote') in the target repository."
73+
),
74+
)
75+
du_parser.add_argument(
76+
"path",
77+
nargs="?",
78+
help="Path to directory within the repository",
79+
).complete = completion.DIR
80+
du_parser.set_defaults(func=CmdDU)

dvc/fs/dvc.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import posixpath
66
import threading
7+
from collections import deque
78
from contextlib import ExitStack, suppress
89
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Type, Union
910

@@ -60,6 +61,7 @@ def _merge_info(repo, key, fs_info, dvc_info):
6061
if fs_info:
6162
ret["type"] = fs_info["type"]
6263
ret["size"] = fs_info["size"]
64+
ret["fs_info"] = fs_info
6365
isexec = False
6466
if fs_info["type"] == "file":
6567
isexec = utils.is_exec(fs_info["mode"])
@@ -421,6 +423,45 @@ def get_file(self, rpath, lpath, **kwargs):
421423
dvc_path = _get_dvc_path(dvc_fs, subkey)
422424
return dvc_fs.get_file(dvc_path, lpath, **kwargs)
423425

426+
def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs):
427+
if maxdepth is not None:
428+
raise NotImplementedError
429+
430+
sizes = {}
431+
dus = {}
432+
todo = deque([self.info(path)])
433+
while todo:
434+
info = todo.popleft()
435+
isdir = info["type"] == "directory"
436+
size = info["size"] or 0
437+
name = info["name"]
438+
439+
if not isdir:
440+
sizes[name] = size
441+
continue
442+
443+
dvc_info = info.get("dvc_info") or {}
444+
fs_info = info.get("fs_info")
445+
entry = dvc_info.get("entry")
446+
if (
447+
dvc_info
448+
and not fs_info
449+
and entry is not None
450+
and entry.size is not None
451+
):
452+
dus[name] = entry.size
453+
continue
454+
455+
if withdirs:
456+
sizes[name] = size
457+
458+
todo.extend(self.ls(info["name"], detail=True))
459+
460+
if total:
461+
return sum(sizes.values()) + sum(dus.values())
462+
463+
return sizes
464+
424465
def close(self):
425466
self._repo_stack.close()
426467

dvc/repo/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class Repo:
7070
from dvc.repo.commit import commit # type: ignore[misc]
7171
from dvc.repo.destroy import destroy # type: ignore[misc]
7272
from dvc.repo.diff import diff # type: ignore[misc]
73+
from dvc.repo.du import du # type: ignore[misc]
7374
from dvc.repo.fetch import fetch # type: ignore[misc]
7475
from dvc.repo.freeze import freeze, unfreeze # type: ignore[misc]
7576
from dvc.repo.gc import gc # type: ignore[misc]

dvc/repo/du.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from typing import Any, Dict, Optional, Union
2+
3+
4+
def du(
5+
url: str,
6+
path: Optional[str] = None,
7+
rev: Optional[str] = None,
8+
summarize: bool = False,
9+
config: Union[None, Dict[str, Any], str] = None,
10+
remote: Optional[str] = None,
11+
remote_config: Optional[dict] = None,
12+
):
13+
from dvc.config import Config
14+
15+
from . import Repo
16+
17+
if config and not isinstance(config, dict):
18+
config_dict = Config.load_file(config)
19+
else:
20+
config_dict = None
21+
22+
with Repo.open(
23+
url,
24+
rev=rev,
25+
subrepos=True,
26+
uninitialized=True,
27+
config=config_dict,
28+
remote=remote,
29+
remote_config=remote_config,
30+
) as repo:
31+
path = path or ""
32+
33+
fs = repo.dvcfs
34+
35+
if summarize or not fs.isdir(path):
36+
return [(path, fs.du(path, total=True))]
37+
38+
ret = [
39+
(entry_path, fs.du(entry_path, total=True)) for entry_path in fs.ls(path)
40+
]
41+
ret.append((path, sum(entry[1] for entry in ret)))
42+
return ret

0 commit comments

Comments
 (0)