Skip to content

Commit

Permalink
Python bindings: simplify previous commit to have a single 'gdalvsi' …
Browse files Browse the repository at this point in the history
…protocol
  • Loading branch information
rouault committed Oct 12, 2024
1 parent 2210300 commit 3ba8656
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 115 deletions.
86 changes: 41 additions & 45 deletions autotest/gcore/test_gdal_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@

def test_gdal_fsspec_open_read():

with fsspec.open("vsi://data/byte.tif") as f:
with fsspec.open("gdalvsi://data/byte.tif") as f:
assert len(f.read()) == gdal.VSIStatL("data/byte.tif").size


def test_gdal_fsspec_info_file():

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
info = fs.info("data/byte.tif")
assert "mtime" in info
del info["mtime"]
Expand All @@ -44,7 +44,7 @@ def test_gdal_fsspec_info_file():

def test_gdal_fsspec_info_dir():

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
info = fs.info("data")
assert (info["mode"] & 16384) != 0
del info["mode"]
Expand All @@ -57,14 +57,14 @@ def test_gdal_fsspec_info_dir():

def test_gdal_fsspec_info_error():

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.info("/i/do/not/exist")


def test_gdal_fsspec_ls():

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
ret = fs.ls("data")
assert len(ret) > 2
item_of_interest = None
Expand All @@ -84,21 +84,21 @@ def test_gdal_fsspec_ls():

def test_gdal_fsspec_ls_file():

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
ret = fs.ls("data/byte.tif")
assert ret == ["data/byte.tif"]


def test_gdal_fsspec_ls_error():

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.ls("vsi://i/do/not/exist")
fs.ls("gdalvsi://i/do/not/exist")


def test_gdal_fsspec_modified():

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
modified = fs.modified("data/byte.tif")
assert modified is not None
import datetime
Expand All @@ -108,70 +108,70 @@ def test_gdal_fsspec_modified():

def test_gdal_fsspec_modified_error():

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.modified("vsi://i/do/not/exist")
fs.modified("gdalvsi://i/do/not/exist")


def test_gdal_fsspec_rm():

with fsspec.open("vsimem:///foo.bin", "wb") as f:
with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("vsimem")
fs.info("/foo.bin")
fs.rm("/foo.bin")
fs = fsspec.filesystem("gdalvsi")
fs.info("/vsimem/foo.bin")
fs.rm("/vsimem/foo.bin")
with pytest.raises(FileNotFoundError):
fs.info("/foo.bin")
fs.info("/vsimem/foo.bin")


def test_gdal_fsspec_rm_error():

fs = fsspec.filesystem("vsimem")
fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.rm("/foo.bin")
fs.rm("/vsimem/foo.bin")


def test_gdal_fsspec_copy():

with fsspec.open("vsimem://foo.bin", "wb") as f:
with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("vsimem")
fs.copy("/foo.bin", "/bar.bin")
assert fs.info("/bar.bin")["size"] == 3
assert fs.info("/foo.bin")["size"] == 3
fs.rm("/foo.bin")
fs.rm("/bar.bin")
fs = fsspec.filesystem("gdalvsi")
fs.copy("/vsimem/foo.bin", "/vsimem/bar.bin")
assert fs.info("/vsimem/bar.bin")["size"] == 3
assert fs.info("/vsimem/foo.bin")["size"] == 3
fs.rm("/vsimem/foo.bin")
fs.rm("/vsimem/bar.bin")


def test_gdal_fsspec_copy_error():

fs = fsspec.filesystem("vsimem")
fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.copy("/foo.bin", "/bar.bin")
fs.copy("/vsimem/foo.bin", "/vsimem/bar.bin")


def test_gdal_fsspec_mv():

with fsspec.open("vsimem://foo.bin", "wb") as f:
with fsspec.open("gdalvsi:///vsimem/foo.bin", "wb") as f:
f.write(b"""bar""")
fs = fsspec.filesystem("vsimem")
fs.mv("/foo.bin", "/bar.bin")
assert fs.info("/bar.bin")["size"] == 3
fs = fsspec.filesystem("gdalvsi")
fs.mv("/vsimem/foo.bin", "/vsimem/bar.bin")
assert fs.info("/vsimem/bar.bin")["size"] == 3
with pytest.raises(FileNotFoundError):
fs.info("/foo.bin")
fs.rm("/bar.bin")
fs.info("/vsimem/foo.bin")
fs.rm("/vsimem/bar.bin")


def test_gdal_fsspec_mv_error():

fs = fsspec.filesystem("vsimem")
fs = fsspec.filesystem("gdalvsi")
with pytest.raises(FileNotFoundError):
fs.mv("/foo.bin", "/bar.bin")
fs.mv("/vsimem/foo.bin", "/bar.bin")


def test_gdal_fsspec_mkdir(tmp_path):

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")

my_path = str(tmp_path) + "/my_dir"

Expand All @@ -189,7 +189,7 @@ def test_gdal_fsspec_mkdir(tmp_path):
with pytest.raises(FileNotFoundError):
fs.info(my_path)

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")
with pytest.raises(Exception):
fs.mkdir(my_path + "/my_subdir", create_parents=False)
with pytest.raises(FileNotFoundError):
Expand All @@ -198,7 +198,7 @@ def test_gdal_fsspec_mkdir(tmp_path):

def test_gdal_fsspec_makedirs(tmp_path):

fs = fsspec.filesystem("vsi")
fs = fsspec.filesystem("gdalvsi")

my_path = str(tmp_path) + "/my_dir"
fs.makedirs(my_path)
Expand All @@ -218,12 +218,8 @@ def test_gdal_fsspec_usable_by_pyarrow_dataset(tmp_vsimem):
tmp_vsimem_file, open("../ogr/data/parquet/test.parquet", "rb").read()
)

fs_vsimem = fsspec.filesystem("vsimem")
fs_vsimem = fsspec.filesystem("gdalvsi")

assert (
ds.dataset(tmp_vsimem_file[len("/vsimem") :], filesystem=fs_vsimem) is not None
)
assert ds.dataset(tmp_vsimem_file, filesystem=fs_vsimem) is not None

assert (
ds.dataset(str(tmp_vsimem)[len("/vsimem") :], filesystem=fs_vsimem) is not None
)
assert ds.dataset(str(tmp_vsimem), filesystem=fs_vsimem) is not None
83 changes: 13 additions & 70 deletions swig/python/osgeo/gdal_fsspec.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,17 @@
# SPDX-License-Identifier: MIT
# Copyright (c) 2024, Even Rouault <even dot rouault at spatialys.com>

"""Module exposing GDAL Virtual File Systems (VSI) as fsspec implementations.
"""Module exposing GDAL Virtual File Systems (VSI) as a "gdalvsi" fsspec implementation.
Importing "osgeo.gdal_fsspec" requires the Python "fsspec"
(https://filesystem-spec.readthedocs.io/en/latest/) module to be available.
A generic "vsi" fsspec protocol is available. All GDAL VSI file names must be
simply prefixed with "vsi://". For example:
A generic "gdalvsi" fsspec protocol is available. All GDAL VSI file names must be
simply prefixed with "gdalvsi://". For example:
- "vsi://data/byte.tif" to access relative file "data/byte.tif"
- "vsi:///home/user/byte.tif" to access absolute file "/home/user/byte.tif"
- "vsi:///vsimem/byte.tif" (note the 3 slashes) to access VSIMem file "/vsimem/byte.tif"
Each VSI file system is also registered as a distinct fsspec protocol, such
as "vsimem", "vsicurl", "vsizip", "vsitar", etc.
Examples:
- "vsimem://byte.tif" to access file "/vsimem/byte.tif"
- "vsicurl://http://example.com/foo" to access file "/vsicurl/http://example.com/foo"
- "vsis3://my_bucket/byte.tif" to access file "/vsis3/my_bucket/byte.tif"
- "vsizip:///home/user/my.zip/foo.tif" (note the 3 slashes to indicate absolute path)
to access (absolute) file "/vsizip//home/user/my.zip/foo.tif"
- "vsizip://my.zip/foo.tif" to access (relative) file "/vsizip/my.zip/foo.tif"
- "gdalvsi://data/byte.tif" to access relative file "data/byte.tif"
- "gdalvsi:///home/user/byte.tif" to access absolute file "/home/user/byte.tif"
- "gdalvsi:///vsimem/byte.tif" (note the 3 slashes) to access VSIMem file "/vsimem/byte.tif"
:since: GDAL 3.11
"""
Expand All @@ -44,52 +32,19 @@ class VSIFileSystem(AbstractFileSystem):
def _get_gdal_path(cls, path):
"""Return a GDAL compatible file from a fsspec file name.
For the file system using the generic "vsi" protocol,
remove the leading vsi:// if found (normally, it should be there,
Remove the leading vsi:// if found (normally, it should be there,
but most AbstractFileSystem implementations seem to be ready to remove
it if found)
For specialized file systems, like vsimem://, etc., for an input
like "vsimem:///foo", return "/vsimem/foo". And for an input like
"/foo" also return "/vsimem/foo".
"""

if isinstance(path, PurePath):
path = stringify_path(path)

if cls.protocol == "vsi":
# "vsi://something" just becomes "something"
if path.startswith("vsi://"):
return path[len("vsi://") :]

return path
# "vsi://something" just becomes "something"
if path.startswith("vsi://"):
return path[len("vsi://") :]

else:
list_protocols_that_need_leeding_slash = [
"vsis3",
"vsigs",
"vsiaz",
"vsioss",
"vsiswift",
]
list_protocols_that_need_leeding_slash += [
item + "_streaming" for item in list_protocols_that_need_leeding_slash
]
list_protocols_that_need_leeding_slash.append("vsimem")

# Deal with paths like "vsis3://foo"
full_protocol = cls.protocol + "://"
if path.startswith(full_protocol):
path = path[len(full_protocol) :]

# Deal with paths like "/foo" with a VSIFileSystem that is something like "vsis3"
if (
cls.protocol in list_protocols_that_need_leeding_slash
and not path.startswith("/")
):
path = "/" + path

return "/" + cls.protocol + path
return path

def _open(
self,
Expand Down Expand Up @@ -271,22 +226,10 @@ def copy(


def register_vsi_implementations():
"""Register a generic "vsi" protocol and "vsimem", "vsitar", etc.
"""Register a generic "gdalvsi" protocol.
This method is automatically called on osgeo.gdal_fsspec import.
"""
register_implementation("vsi", VSIFileSystem)
for vsi_prefix in gdal.GetFileSystemsPrefixes():
if vsi_prefix.startswith("/vsi") and not vsi_prefix.endswith("?"):
assert vsi_prefix.endswith("/")
protocol = vsi_prefix[1:-1]
# We need to duplicate the base class for each protocol, so that
# each class has a distinct "protocol" member.
new_class = type(
"VSIFileSystem_" + protocol,
VSIFileSystem.__bases__,
dict(VSIFileSystem.__dict__),
)
register_implementation(protocol, new_class)
register_implementation("gdalvsi", VSIFileSystem)


register_vsi_implementations()

0 comments on commit 3ba8656

Please sign in to comment.