Skip to content

Commit 491155c

Browse files
authored
experimental upath implementation for dvcfilesystem (#10302)
1 parent 9bb4501 commit 491155c

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

dvc/fs/dvc_path.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""UPath implementation for DVCFileSystem.
2+
3+
This provides a `pathlib.Path` like interface to
4+
work with DVCFileSystem.
5+
6+
Examples
7+
--------
8+
9+
>>> from upath import UPath
10+
11+
>>> local = UPath("dvc://path/to/local/repo")
12+
>>> https = UPath("dvc+https://github.com/iterative/example-get-started", rev="main")
13+
>>> ssh = UPath("dvc+ssh://git@github.com:iterative/example-get-started.git")
14+
"""
15+
16+
from urllib.parse import urlsplit
17+
18+
from upath import UPath
19+
20+
21+
class DVCPath(UPath):
22+
@classmethod
23+
def _transform_init_args(cls, args, protocol, storage_options):
24+
if not args:
25+
args = ("/",)
26+
elif (
27+
args
28+
and "url" not in storage_options
29+
and protocol in {"dvc+http", "dvc+https", "dvc+ssh"}
30+
):
31+
url, *rest = args
32+
url = urlsplit(str(url))
33+
proto = protocol.split("+")[1]
34+
if proto == "ssh":
35+
base_url = url.netloc + url.path
36+
else:
37+
base_url = url._replace(scheme=proto).geturl()
38+
storage_options["url"] = base_url
39+
# Assume the given path is a root url
40+
args = ("/", *rest)
41+
return super()._transform_init_args(args, "dvc", storage_options)
42+
43+
def __str__(self):
44+
s = super().__str__()
45+
if url := self.storage_options.get("url"):
46+
return s.replace("dvc://", f"dvc+{url}", 1)
47+
return s
48+
49+
def with_segments(self, *pathsegments):
50+
obj = super().with_segments(*pathsegments)
51+
# cache filesystem, as dvcfs does not cache filesystem
52+
# caveat: any joinpath operation will instantiate filesystem
53+
obj._fs_cached = self.fs
54+
return obj

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,14 @@ dvc = "dvc.cli:main"
130130
[project.entry-points."fsspec.specs"]
131131
dvc = "dvc.api:DVCFileSystem"
132132

133+
[project.entry-points."universal_pathlib.implementations"]
134+
dvc = "dvc.fs.dvc_path:DVCPath"
135+
# universal_pathlib does not support fsspec url chaining yet.
136+
# see https://github.com/fsspec/universal_pathlib/issues/28.
137+
"dvc+http" = "dvc.fs.dvc_path:DVCPath"
138+
"dvc+https" = "dvc.fs.dvc_path:DVCPath"
139+
"dvc+ssh" = "dvc.fs.dvc_path:DVCPath"
140+
133141
[project.entry-points."pyinstaller40"]
134142
hook-dirs = "dvc.__pyinstaller:get_hook_dirs"
135143
tests = "dvc.__pyinstaller:get_PyInstaller_tests"
@@ -232,6 +240,7 @@ module = [
232240
"ruamel.yaml.*",
233241
"shortuuid",
234242
"shtab",
243+
"upath",
235244
"virtualenv",
236245
"viztracer",
237246
"voluptuous",

0 commit comments

Comments
 (0)