-
-
Notifications
You must be signed in to change notification settings - Fork 27
ENH: support URI schemes (zip://, s3://) by converting to vsi paths #43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
b4b757a
abe4fd7
acc8ded
6980f2a
2606b43
2cef5f6
47a26fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
import os | ||
import contextlib | ||
|
||
import pytest | ||
|
||
import pyogrio | ||
import pyogrio.raw | ||
from pyogrio.util import vsi_path | ||
|
||
|
||
@contextlib.contextmanager | ||
def change_cwd(path): | ||
curdir = os.getcwd() | ||
os.chdir(str(path)) | ||
try: | ||
yield | ||
finally: | ||
os.chdir(curdir) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"path, expected", | ||
[ | ||
# local file paths that should be passed through as is | ||
("data.gpkg", "data.gpkg"), | ||
("/home/user/data.gpkg", "/home/user/data.gpkg"), | ||
(r"C:\User\Documents\data.gpkg", r"C:\User\Documents\data.gpkg"), | ||
("file:///home/user/data.gpkg", "/home/user/data.gpkg"), | ||
# cloud URIs | ||
("s3://testing/data.gpkg", "/vsis3/testing/data.gpkg"), | ||
("gs://testing/data.gpkg", "/vsigs/testing/data.gpkg"), | ||
("az://testing/data.gpkg", "/vsiaz/testing/data.gpkg"), | ||
("adl://testing/data.gpkg", "/vsiadls/testing/data.gpkg"), | ||
("adls://testing/data.gpkg", "/vsiadls/testing/data.gpkg"), | ||
("hdfs://testing/data.gpkg", "/vsihdfs/testing/data.gpkg"), | ||
("webhdfs://testing/data.gpkg", "/vsiwebhdfs/testing/data.gpkg"), | ||
# archives | ||
("zip://data.zip", "/vsizip/data.zip"), | ||
("tar://data.tar", "/vsitar/data.tar"), | ||
("gzip://data.gz", "/vsigzip/data.gz"), | ||
("tar://./my.tar!my.geojson", "/vsitar/./my.tar/my.geojson"), | ||
( | ||
"zip://home/data/shapefile.zip!layer.shp", | ||
"/vsizip/home/data/shapefile.zip/layer.shp", | ||
), | ||
# combined schemes | ||
("zip+s3://testing/shapefile.zip", "/vsizip/vsis3/testing/shapefile.zip"), | ||
( | ||
"zip+https://s3.amazonaws.com/testing/shapefile.zip", | ||
"/vsizip/vsicurl/https://s3.amazonaws.com/testing/shapefile.zip", | ||
), | ||
], | ||
) | ||
def test_vsi_path(path, expected): | ||
assert vsi_path(path) == expected | ||
|
||
|
||
def test_vsi_path_unknown(): | ||
# unrecognized URI gets passed through as is | ||
assert vsi_path("s4://test/data.geojson") == "s4://test/data.geojson" | ||
|
||
|
||
def test_vsi_handling_read_functions(naturalearth_lowres_vsi): | ||
# test that all different read entry points have the path handling | ||
# (a zip:// path would otherwise fail) | ||
path, _ = naturalearth_lowres_vsi | ||
path = "zip://" + str(path) | ||
|
||
result = pyogrio.raw.read(path) | ||
assert len(result[2]) == 177 | ||
|
||
result = pyogrio.read_info(path) | ||
assert result["features"] == 177 | ||
|
||
result = pyogrio.read_bounds(path) | ||
assert len(result[0]) == 177 | ||
|
||
result = pyogrio.read_dataframe(path) | ||
assert len(result) == 177 | ||
|
||
|
||
def test_path_absolute(data_dir): | ||
# pathlib path | ||
path = data_dir / "naturalearth_lowres/naturalearth_lowres.shp" | ||
df = pyogrio.read_dataframe(path) | ||
len(df) == 177 | ||
|
||
# str path | ||
df = pyogrio.read_dataframe(str(path)) | ||
len(df) == 177 | ||
|
||
|
||
def test_path_relative(data_dir): | ||
with change_cwd(data_dir): | ||
df = pyogrio.read_dataframe("naturalearth_lowres/naturalearth_lowres.shp") | ||
len(df) == 177 | ||
|
||
|
||
def test_uri_local_file(data_dir): | ||
uri = "file://" + str(data_dir / "naturalearth_lowres/naturalearth_lowres.shp") | ||
df = pyogrio.read_dataframe(uri) | ||
len(df) == 177 | ||
|
||
|
||
def test_zip_path(naturalearth_lowres_vsi): | ||
path, path_vsi = naturalearth_lowres_vsi | ||
path_zip = "zip://" + str(path) | ||
|
||
# absolute zip path | ||
df = pyogrio.read_dataframe(path_zip) | ||
assert len(df) == 177 | ||
|
||
# relative zip path | ||
with change_cwd(path.parent): | ||
df = pyogrio.read_dataframe("zip://" + path.name) | ||
assert len(df) == 177 | ||
|
||
# absolute vsizip path | ||
df = pyogrio.read_dataframe(path_vsi) | ||
assert len(df) == 177 | ||
|
||
|
||
@pytest.mark.network | ||
def test_url(): | ||
df = pyogrio.read_dataframe( | ||
"https://raw.githubusercontent.com/geopandas/pyogrio/main/pyogrio/tests/fixtures/naturalearth_lowres/naturalearth_lowres.shp" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are we OK with having tests that require network? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I think it is fine. It is surely useful and we have similar one in geopandas as well. |
||
) | ||
assert len(df) == 177 | ||
|
||
|
||
@pytest.mark.network | ||
def test_url_with_zip(): | ||
df = pyogrio.read_dataframe( | ||
"zip+https://s3.amazonaws.com/fiona-testing/coutwildrnp.zip" | ||
) | ||
assert len(df) == 67 | ||
|
||
|
||
@pytest.fixture | ||
def aws_env_setup(monkeypatch): | ||
monkeypatch.setenv("AWS_NO_SIGN_REQUEST", "YES") | ||
|
||
|
||
@pytest.mark.network | ||
def test_uri_s3(aws_env_setup): | ||
df = pyogrio.read_dataframe('zip+s3://fiona-testing/coutwildrnp.zip') | ||
assert len(df) == 67 |
Uh oh!
There was an error while loading. Please reload this page.