-
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
✨ PyogrioReaderIterDataPipe for reading vector OGR files (#19)
An iterable-style DataPipe for vector data! Also added Python 3.8 job to CI build matrix which doesn't include 'vector' dependencies. That job is also skipped when PR is in draft mode. * ➕ Add pyogrio[geopandas] Vectorized vector I/O using OGR! * ✨ PyogrioReaderIterDataPipe for reading vector OGR files An iterable-style DataPipe for vector data! Uses pyogrio with geopandas for the I/O. Included a doctest and unit test, added a new section in the API docs and some more intersphinx mappings. * 👷 Run tests with optional packages on Python 3.9 Making a proper build matrix now! Minimal tests (no optional dependencies) run on Python 3.8, while full tests (with all dependencies) run on Python 3.9. Wanted to do Python 3.10 for full tests, but need to wait for rasterio 1.3.0 to come out of beta first. * 🚩 Skip Ubuntu Python 3.8 CI tests for draft PRs Conserve GitHub Actions Continuous Integration resources when a Pull Request is in draft mode.
- Loading branch information
Showing
8 changed files
with
372 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
""" | ||
DataPipes for :doc:`pyogrio <pyogrio:index>`. | ||
""" | ||
from typing import Any, Dict, Iterator, Optional, Tuple | ||
|
||
try: | ||
import pyogrio | ||
except: | ||
pyogrio = None | ||
from torchdata.datapipes import functional_datapipe | ||
from torchdata.datapipes.iter import IterDataPipe | ||
from torchdata.datapipes.utils import StreamWrapper | ||
|
||
|
||
@functional_datapipe("read_from_pyogrio") | ||
class PyogrioReaderIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]): | ||
""" | ||
Takes vector files (e.g. FlatGeoBuf, GeoPackage, GeoJSON) from local disk | ||
or URLs (as long as they can be read by pyogrio) and yields tuples of | ||
filename and :py:class:`geopandas.GeoDataFrame` objects | ||
(functional name: ``read_from_pyogrio``). | ||
Based on | ||
https://github.com/pytorch/data/blob/v0.3.0/torchdata/datapipes/iter/load/iopath.py#L37-L83 | ||
Parameters | ||
---------- | ||
source_datapipe : IterDataPipe[str] | ||
A DataPipe that contains filepaths or URL links to vector files such as | ||
FlatGeoBuf, GeoPackage, GeoJSON, etc. | ||
kwargs : Optional | ||
Extra keyword arguments to pass to | ||
`pyogrio.read_dataframe <https://pyogrio.readthedocs.io/en/latest/api.html#geopandas-integration>`_. | ||
Yields | ||
------ | ||
stream_obj : Tuple[str, geopandas.GeoDataFrame] | ||
A tuple consisting of the filename that was passed in, and a | ||
:py:class:`geopandas.GeoDataFrame` object containing the vector data. | ||
Raises | ||
------ | ||
ModuleNotFoundError | ||
If ``pyogrio`` is not installed. See | ||
:doc:`install instructions for pyogrio <pyogrio:install>`, and ensure | ||
that ``geopandas`` is installed too (e.g. via | ||
``pip install pyogrio[geopandas]``) before using this class. | ||
Example | ||
------- | ||
>>> import pytest | ||
>>> pyogrio = pytest.importorskip("pyogrio") | ||
... | ||
>>> from torchdata.datapipes.iter import IterableWrapper | ||
>>> from zen3geo.datapipes import PyogrioReader | ||
... | ||
>>> # Read in GeoTIFF data using DataPipe | ||
>>> file_url: str = "https://github.com/geopandas/pyogrio/raw/v0.4.0a1/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg" | ||
>>> dp = IterableWrapper(iterable=[file_url]) | ||
>>> dp_pyogrio = dp.read_from_pyogrio() | ||
... | ||
>>> # Loop or iterate over the DataPipe stream | ||
>>> it = iter(dp_pyogrio) | ||
>>> filename, geodataframe = next(it) | ||
>>> filename | ||
'https://github.com/geopandas/pyogrio/raw/v0.4.0a1/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg' | ||
>>> geodataframe | ||
StreamWrapper< col_bool col_int8 ... col_float64 geometry | ||
0 1.0 1.0 ... 1.5 POINT (0.00000 0.00000) | ||
1 0.0 2.0 ... 2.5 POINT (1.00000 1.00000) | ||
2 1.0 3.0 ... 3.5 POINT (2.00000 2.00000) | ||
3 NaN NaN ... NaN POINT (4.00000 4.00000) | ||
<BLANKLINE> | ||
[4 rows x 12 columns]> | ||
""" | ||
|
||
def __init__( | ||
self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]] | ||
) -> None: | ||
if pyogrio is None: | ||
raise ModuleNotFoundError( | ||
"Package `pyogrio` is required to be installed to use this datapipe. " | ||
"Please use `pip install pyogrio[geopandas]` or " | ||
"`conda install -c conda-forge pyogrio` " | ||
"to install the package" | ||
) | ||
self.source_datapipe: IterDataPipe[str] = source_datapipe | ||
self.kwargs = kwargs | ||
|
||
def __iter__(self) -> Iterator[Tuple]: | ||
for filename in self.source_datapipe: | ||
yield ( | ||
filename, | ||
StreamWrapper(pyogrio.read_dataframe(filename, **self.kwargs)), | ||
) | ||
|
||
def __len__(self) -> int: | ||
return len(self.source_datapipe) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
""" | ||
Tests for pyogrio datapipes. | ||
""" | ||
import pytest | ||
from torchdata.datapipes.iter import IterableWrapper | ||
|
||
from zen3geo.datapipes import PyogrioReader | ||
|
||
pyogrio = pytest.importorskip("pyogrio") | ||
|
||
# %% | ||
def test_pyogrio_reader(): | ||
""" | ||
Ensure that PyogrioReader works to read in a GeoTIFF file and outputs a | ||
tuple made up of a filename and an xarray.DataArray object. | ||
""" | ||
file_url: str = "https://github.com/geopandas/pyogrio/raw/v0.4.0a1/pyogrio/tests/fixtures/test_gpkg_nulls.gpkg" | ||
dp = IterableWrapper(iterable=[file_url]) | ||
|
||
# Using class constructors | ||
dp_pyogrio = PyogrioReader(source_datapipe=dp) | ||
# Using functional form (recommended) | ||
dp_pyogrio = dp.read_from_pyogrio() | ||
|
||
assert len(dp_pyogrio) == 1 | ||
it = iter(dp_pyogrio) | ||
filename, geodataframe = next(it) | ||
|
||
assert isinstance(filename, str) | ||
assert geodataframe.shape == (4, 12) | ||
assert any(geodataframe.isna()) | ||
assert all(geodataframe.geom_type == "Point") |