Skip to content

Commit 8c3a01d

Browse files
authored
Improved GeoArrow interop (#308)
### Change list - Check for Arrow PyCapsule interface methods on input data, both in `viz` and in each layer type - Add examples for passing GeoArrow table into layer - Add docstrings for `table` attribute - Validate that the computed center is in wgs84 bounds - Add `PyarrowTableTrait` to docs page on traits - Add auto-light/dark to docs website
1 parent a46ef39 commit 8c3a01d

File tree

9 files changed

+771
-537
lines changed

9 files changed

+771
-537
lines changed

docs/api/traits.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# lonboard.traits
22

3+
::: lonboard.traits.PyarrowTableTrait
4+
35
::: lonboard.traits.ColorAccessor
46

57
::: lonboard.traits.FloatAccessor

lonboard/_layer.py

Lines changed: 111 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -411,25 +411,51 @@ class ScatterplotLayer(BaseArrowLayer):
411411
412412
**Example:**
413413
414+
From GeoPandas:
415+
414416
```py
415417
import geopandas as gpd
416418
from lonboard import Map, ScatterplotLayer
417419
418-
# A GeoDataFrame with Point geometries
420+
# A GeoDataFrame with Point or MultiPoint geometries
419421
gdf = gpd.GeoDataFrame()
420422
layer = ScatterplotLayer.from_geopandas(
421423
gdf,
422424
get_fill_color=[255, 0, 0],
423425
)
424426
m = Map(layers=[layer])
425427
```
428+
429+
From [geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest):
430+
431+
```py
432+
from geoarrow.rust.core import read_parquet
433+
from lonboard import Map, ScatterplotLayer
434+
435+
# Example: A GeoParquet file with Point or MultiPoint geometries
436+
table = read_parquet("path/to/file.parquet")
437+
layer = ScatterplotLayer(
438+
table=table,
439+
get_fill_color=[255, 0, 0],
440+
)
441+
m = Map(layers=[layer])
442+
```
426443
"""
427444

428445
_layer_type = traitlets.Unicode("scatterplot").tag(sync=True)
429446

430447
table = PyarrowTableTrait(
431448
allowed_geometry_types={EXTENSION_NAME.POINT, EXTENSION_NAME.MULTIPOINT}
432449
)
450+
"""A GeoArrow table with a Point or MultiPoint column.
451+
452+
This is the fastest way to plot data from an existing GeoArrow source, such as
453+
[geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest) or
454+
[geoarrow-pyarrow](https://geoarrow.github.io/geoarrow-python/main/index.html).
455+
456+
If you have a GeoPandas `GeoDataFrame`, use
457+
[`from_geopandas`][lonboard.ScatterplotLayer.from_geopandas] instead.
458+
"""
433459

434460
radius_units = traitlets.Unicode("meters", allow_none=True).tag(sync=True)
435461
"""
@@ -605,11 +631,13 @@ class PathLayer(BaseArrowLayer):
605631
606632
**Example:**
607633
634+
From GeoPandas:
635+
608636
```py
609637
import geopandas as gpd
610638
from lonboard import Map, PathLayer
611639
612-
# A GeoDataFrame with LineString geometries
640+
# A GeoDataFrame with LineString or MultiLineString geometries
613641
gdf = gpd.GeoDataFrame()
614642
layer = PathLayer.from_geopandas(
615643
gdf,
@@ -618,6 +646,22 @@ class PathLayer(BaseArrowLayer):
618646
)
619647
m = Map(layers=[layer])
620648
```
649+
650+
From [geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest):
651+
652+
```py
653+
from geoarrow.rust.core import read_parquet
654+
from lonboard import Map, PathLayer
655+
656+
# Example: A GeoParquet file with LineString or MultiLineString geometries
657+
table = read_parquet("path/to/file.parquet")
658+
layer = PathLayer(
659+
table=table,
660+
get_color=[255, 0, 0],
661+
width_min_pixels=2,
662+
)
663+
m = Map(layers=[layer])
664+
```
621665
"""
622666

623667
_layer_type = traitlets.Unicode("path").tag(sync=True)
@@ -628,6 +672,15 @@ class PathLayer(BaseArrowLayer):
628672
EXTENSION_NAME.MULTILINESTRING,
629673
}
630674
)
675+
"""A GeoArrow table with a LineString or MultiLineString column.
676+
677+
This is the fastest way to plot data from an existing GeoArrow source, such as
678+
[geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest) or
679+
[geoarrow-pyarrow](https://geoarrow.github.io/geoarrow-python/main/index.html).
680+
681+
If you have a GeoPandas `GeoDataFrame`, use
682+
[`from_geopandas`][lonboard.PathLayer.from_geopandas] instead.
683+
"""
631684

632685
width_units = traitlets.Unicode(allow_none=True).tag(sync=True)
633686
"""
@@ -738,25 +791,51 @@ class SolidPolygonLayer(BaseArrowLayer):
738791
739792
**Example:**
740793
794+
From GeoPandas:
795+
741796
```py
742797
import geopandas as gpd
743798
from lonboard import Map, SolidPolygonLayer
744799
745-
# A GeoDataFrame with Polygon geometries
800+
# A GeoDataFrame with Polygon or MultiPolygon geometries
746801
gdf = gpd.GeoDataFrame()
747802
layer = SolidPolygonLayer.from_geopandas(
748803
gdf,
749804
get_fill_color=[255, 0, 0],
750805
)
751806
m = Map(layers=[layer])
752807
```
808+
809+
From [geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest):
810+
811+
```py
812+
from geoarrow.rust.core import read_parquet
813+
from lonboard import Map, SolidPolygonLayer
814+
815+
# Example: A GeoParquet file with Polygon or MultiPolygon geometries
816+
table = read_parquet("path/to/file.parquet")
817+
layer = SolidPolygonLayer(
818+
table=table,
819+
get_fill_color=[255, 0, 0],
820+
)
821+
m = Map(layers=[layer])
822+
```
753823
"""
754824

755825
_layer_type = traitlets.Unicode("solid-polygon").tag(sync=True)
756826

757827
table = PyarrowTableTrait(
758828
allowed_geometry_types={EXTENSION_NAME.POLYGON, EXTENSION_NAME.MULTIPOLYGON}
759829
)
830+
"""A GeoArrow table with a Polygon or MultiPolygon column.
831+
832+
This is the fastest way to plot data from an existing GeoArrow source, such as
833+
[geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest) or
834+
[geoarrow-pyarrow](https://geoarrow.github.io/geoarrow-python/main/index.html).
835+
836+
If you have a GeoPandas `GeoDataFrame`, use
837+
[`from_geopandas`][lonboard.SolidPolygonLayer.from_geopandas] instead.
838+
"""
760839

761840
filled = traitlets.Bool(allow_none=True).tag(sync=True)
762841
"""
@@ -857,17 +936,35 @@ def _validate_accessor_length(self, proposal):
857936
class HeatmapLayer(BaseArrowLayer):
858937
"""The `HeatmapLayer` visualizes the spatial distribution of data.
859938
860-
**Example:**
939+
**Example**
940+
941+
From GeoPandas:
861942
862943
```py
863944
import geopandas as gpd
864945
from lonboard import Map, HeatmapLayer
865946
866947
# A GeoDataFrame with Point geometries
867948
gdf = gpd.GeoDataFrame()
868-
layer = HeatmapLayer.from_geopandas(gdf,)
949+
layer = HeatmapLayer.from_geopandas(gdf)
950+
m = Map(layers=[layer])
951+
```
952+
953+
From [geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest):
954+
955+
```py
956+
from geoarrow.rust.core import read_parquet
957+
from lonboard import Map, HeatmapLayer
958+
959+
# Example: A GeoParquet file with Point geometries
960+
table = read_parquet("path/to/file.parquet")
961+
layer = HeatmapLayer(
962+
table=table,
963+
get_fill_color=[255, 0, 0],
964+
)
869965
m = Map(layers=[layer])
870966
```
967+
871968
"""
872969

873970
_layer_type = traitlets.Unicode("heatmap").tag(sync=True)
@@ -880,6 +977,15 @@ def _default_rows_per_chunk(self):
880977
return len(self.table)
881978

882979
table = PyarrowTableTrait(allowed_geometry_types={EXTENSION_NAME.POINT})
980+
"""A GeoArrow table with a Point column.
981+
982+
This is the fastest way to plot data from an existing GeoArrow source, such as
983+
[geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest) or
984+
[geoarrow-pyarrow](https://geoarrow.github.io/geoarrow-python/main/index.html).
985+
986+
If you have a GeoPandas `GeoDataFrame`, use
987+
[`from_geopandas`][lonboard.HeatmapLayer.from_geopandas] instead.
988+
"""
883989

884990
radius_pixels = traitlets.Float(allow_none=True).tag(sync=True)
885991
"""Radius of the circle in pixels, to which the weight of an object is distributed.

lonboard/_viewport.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,16 @@ def bbox_to_zoom_level(bbox: Bbox) -> int:
7373
def compute_view(tables: List[pa.Table]):
7474
"""Automatically computes a view state for the data passed in."""
7575
bbox, center = get_bbox_center(tables)
76+
77+
if center.x is not None and (center.x < 180 or center.x > 180):
78+
msg = "Longitude of data's center is outside of WGS84 bounds.\n"
79+
msg += "Is data in WGS84 projection?"
80+
raise ValueError(msg)
81+
if center.y is not None and (center.y < 90 or center.y > 90):
82+
msg = "Latitude of data's center is outside of WGS84 bounds.\n"
83+
msg += "Is data in WGS84 projection?"
84+
raise ValueError(msg)
85+
7686
# When no geo column is found, bbox will have inf values
7787
try:
7888
zoom = bbox_to_zoom_level(bbox)

lonboard/_viz.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,17 @@ class GeoInterfaceProtocol(Protocol):
3636
def __geo_interface__(self) -> dict:
3737
...
3838

39+
class ArrowStreamExportable(Protocol):
40+
def __arrow_c_stream__(self, requested_schema: object | None = None) -> object:
41+
...
42+
3943
VizDataInput = Union[
4044
gpd.GeoDataFrame,
4145
gpd.GeoSeries,
4246
pa.Table,
4347
NDArray[np.object_],
4448
shapely.geometry.base.BaseGeometry,
49+
ArrowStreamExportable,
4550
GeoInterfaceProtocol,
4651
Dict[str, Any],
4752
]
@@ -121,6 +126,11 @@ def create_layer_from_data_input(
121126
if isinstance(data, shapely.geometry.base.BaseGeometry):
122127
return _viz_shapely_scalar(data, **kwargs)
123128

129+
# Anything with __arrow_c_stream__
130+
if hasattr(data, "__arrow_c_stream__"):
131+
data = cast(ArrowStreamExportable, data)
132+
return _viz_geoarrow_table(pa.table(data.__arrow_c_stream__()), **kwargs)
133+
124134
# Anything with __geo_interface__
125135
if hasattr(data, "__geo_interface__"):
126136
data = cast(GeoInterfaceProtocol, data)
@@ -236,6 +246,7 @@ def _viz_geo_interface(
236246
def _viz_geoarrow_table(
237247
table: pa.Table, **kwargs
238248
) -> Union[ScatterplotLayer, PathLayer, SolidPolygonLayer]:
249+
# TODO: don't hard-code "geometry"
239250
geometry_ext_type = table.schema.field("geometry").metadata.get(
240251
b"ARROW:extension:name"
241252
)

lonboard/experimental/_layer.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@ class ArcLayer(BaseLayer):
2020
_layer_type = traitlets.Unicode("arc").tag(sync=True)
2121

2222
table = PyarrowTableTrait()
23+
"""A GeoArrow table.
24+
25+
This is the fastest way to plot data from an existing GeoArrow source, such as
26+
[geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest) or
27+
[geoarrow-pyarrow](https://geoarrow.github.io/geoarrow-python/main/index.html).
28+
29+
If you have a GeoPandas `GeoDataFrame`, use
30+
[`from_geopandas`][lonboard.ScatterplotLayer.from_geopandas] instead.
31+
"""
2332

2433
great_circle = traitlets.Bool(allow_none=True).tag(sync=True)
2534
"""If `True`, create the arc along the shortest path on the earth surface.
@@ -132,6 +141,15 @@ class TextLayer(BaseLayer):
132141
_layer_type = traitlets.Unicode("text").tag(sync=True)
133142

134143
table = PyarrowTableTrait(allowed_geometry_types={EXTENSION_NAME.POINT})
144+
"""A GeoArrow table with a Point or MultiPoint column.
145+
146+
This is the fastest way to plot data from an existing GeoArrow source, such as
147+
[geoarrow-rust](https://geoarrow.github.io/geoarrow-rs/python/latest) or
148+
[geoarrow-pyarrow](https://geoarrow.github.io/geoarrow-python/main/index.html).
149+
150+
If you have a GeoPandas `GeoDataFrame`, use
151+
[`from_geopandas`][lonboard.ScatterplotLayer.from_geopandas] instead.
152+
"""
135153

136154
billboard = traitlets.Bool().tag(sync=True)
137155
"""If `true`, the text always faces camera. Otherwise the text faces up (z).

0 commit comments

Comments
 (0)