Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 44 additions & 3 deletions lonboard/_geoarrow/parse_wkb.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""Handle GeoArrow tables with WKB-encoded geometry
"""
"""Handle GeoArrow tables with WKB-encoded geometry"""

import json
from typing import Tuple

import pyarrow as pa
import shapely

from lonboard._constants import EXTENSION_NAME
from lonboard._constants import EXTENSION_NAME, OGC_84
from lonboard._geoarrow.crs import get_field_crs
from lonboard._geoarrow.extension_types import construct_geometry_array
from lonboard._utils import get_geometry_column_index


def parse_wkb_table(table: pa.Table) -> pa.Table:
Expand All @@ -16,6 +18,8 @@ def parse_wkb_table(table: pa.Table) -> pa.Table:
If no columns are WKB-encoded, returns the input. Note that WKB columns must be
tagged with an extension name of `geoarrow.wkb` or `ogc.wkb`
"""
table = parse_geoparquet_table(table)

wkb_names = {EXTENSION_NAME.WKB, EXTENSION_NAME.OGC_WKB}
for field_idx in range(len(table.schema)):
field = table.field(field_idx)
Expand All @@ -32,6 +36,43 @@ def parse_wkb_table(table: pa.Table) -> pa.Table:
return table


def parse_geoparquet_table(table: pa.Table) -> pa.Table:
"""Parse GeoParquet table metadata, assigning it to GeoArrow metadata"""
# If a column already has geoarrow metadata, don't parse from GeoParquet metadata
if get_geometry_column_index(table.schema) is not None:
return table

schema_metadata = table.schema.metadata or {}
geo_metadata = schema_metadata.get(b"geo")
if not geo_metadata:
return table

try:
geo_metadata = json.loads(geo_metadata)
except json.JSONDecodeError:
return table

primary_column = geo_metadata["primary_column"]
column_meta = geo_metadata["columns"][primary_column]
column_idx = [
idx for idx, name in enumerate(table.column_names) if name == primary_column
]
assert len(column_idx) == 1, f"Expected one column with name {primary_column}"
column_idx = column_idx[0]
if column_meta["encoding"] == "WKB":
existing_field = table.schema.field(column_idx)
existing_column = table.column(column_idx)
crs_metadata = {"crs": column_meta.get("crs", OGC_84.to_json_dict())}
metadata = {
b"ARROW:extension:name": EXTENSION_NAME.WKB,
b"ARROW:extension:metadata": json.dumps(crs_metadata),
}
new_field = existing_field.with_metadata(metadata)
table = table.set_column(column_idx, new_field, existing_column)

return table


def parse_wkb_column(
field: pa.Field, column: pa.ChunkedArray
) -> Tuple[pa.Field, pa.ChunkedArray]:
Expand Down
12 changes: 12 additions & 0 deletions tests/test_geoarrow.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
from tempfile import NamedTemporaryFile

import geodatasets
import geopandas as gpd
import pyarrow.parquet as pq
from pyproj import CRS

from lonboard import SolidPolygonLayer
Expand Down Expand Up @@ -61,3 +63,13 @@ def test_reproject_sliced_array():
sliced_table = table.slice(2)
# This should work even with a sliced array.
_reprojected = reproject_table(sliced_table, to_crs=OGC_84)


def test_geoparquet_metadata():
gdf = gpd.read_file(geodatasets.get_path("nybb"))

with NamedTemporaryFile("+wb", suffix=".parquet") as f:
gdf.to_parquet(f)
table = pq.read_table(f)

_layer = SolidPolygonLayer(table=table)