Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: raise Python warnings instead of printing to stderr for GDAL Warning messages #242

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
specifying a mask manually for missing values in `write` (#219)
- Standardized 3-dimensional geometry type labels from "2.5D <type>" to
"<type> Z" for consistency with well-known text (WKT) formats (#234)
- Failure error messages from GDAL are no longer printed to stderr (they were
already translated into Python exceptions as well) (#236).
- Failure and warning error messages from GDAL are no longer printed to
stderr: failures were already translated into Python exceptions
and warning messages are now translated into Python warnings (#236, #242).

## 0.5.1 (2023-01-26)

Expand Down
8 changes: 8 additions & 0 deletions pyogrio/_err.pyx
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# ported from fiona::_err.pyx
from enum import IntEnum
import warnings

from pyogrio._ogr cimport (
CE_None, CE_Debug, CE_Warning, CE_Failure, CE_Fatal, CPLErrorReset,
Expand Down Expand Up @@ -229,6 +230,13 @@ cdef void error_handler(CPLErr err_class, int err_no, const char* err_msg) nogil
# with error return codes and translated into Python exceptions
return

elif err_class == CE_Warning:
with gil:
msg_b = err_msg
msg = msg_b.decode('utf-8')
warnings.warn(msg, RuntimeWarning)
return

# Fall back to the default handler for non-failure messages since
# they won't be translated into exceptions.
CPLDefaultErrorHandler(err_class, err_no, err_msg)
Expand Down
2 changes: 1 addition & 1 deletion pyogrio/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def read_info(path_or_buffer, /, layer=None, encoding=None, **kwargs):
source.
**kwargs
Additional driver-specific dataset open options passed to OGR. Invalid
options are logged by OGR to stderr and are not captured.
options will trigger a warning.

Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion pyogrio/geopandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def read_dataframe(
installed). When enabled, this provides a further speed-up.
**kwargs
Additional driver-specific dataset open options passed to OGR. Invalid
options are logged by OGR to stderr and are not captured.
options will trigger a warning.

Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion pyogrio/raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def read(
If True, will return the FIDs of the feature that were read.
**kwargs
Additional driver-specific dataset open options passed to OGR. Invalid
options are logged by OGR to stderr and are not captured.
options will trigger a warning.

Returns
-------
Expand Down
1 change: 1 addition & 0 deletions pyogrio/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def prepare_testfile(testfile_path, dst_dir, ext):
elif ext == ".gpkg":
# For .gpkg, spatial_index=False to avoid the rows being reordered
meta["spatial_index"] = False
meta["geometry_type"] = "MultiPolygon"

write(dst_path, geometry, field_data, **meta)
return dst_path
Expand Down
15 changes: 12 additions & 3 deletions pyogrio/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,9 +263,9 @@ def test_read_info_dataset_kwargs(data_dir, dataset_kwargs, fields):
assert meta["fields"].tolist() == fields


def test_read_info_invalid_dataset_kwargs(capfd, naturalearth_lowres):
read_info(naturalearth_lowres, INVALID="YES")
assert "does not support open option INVALID" in capfd.readouterr().err
def test_read_info_invalid_dataset_kwargs(naturalearth_lowres):
with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
read_info(naturalearth_lowres, INVALID="YES")


@pytest.mark.parametrize(
Expand Down Expand Up @@ -298,3 +298,12 @@ def test_error_handling(capfd):
read_info("non-existent.shp")

assert capfd.readouterr().err == ""


def test_error_handling_warning(capfd, naturalearth_lowres):
# an operation that triggers a GDAL Warning
# -> translated into a Python warning + not printed to stderr
with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
read_info(naturalearth_lowres, INVALID="YES")

assert capfd.readouterr().err == ""
28 changes: 19 additions & 9 deletions pyogrio/tests/test_geopandas_io.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
from datetime import datetime
import os
from packaging.version import Version
Expand Down Expand Up @@ -649,12 +650,21 @@ def test_write_dataframe_promote_to_multi_layer_geom_type(
input_gdf = read_dataframe(naturalearth_lowres)

output_path = tmp_path / f"test_promote_layer_geom_type{ext}"
write_dataframe(
input_gdf,
output_path,
promote_to_multi=promote_to_multi,
geometry_type=geometry_type,
)

if ext == ".gpkg" and geometry_type in ("Polygon", "Point"):
ctx = pytest.warns(
RuntimeWarning, match="A geometry of type MULTIPOLYGON is inserted"
)
else:
ctx = contextlib.nullcontext()

with ctx:
write_dataframe(
input_gdf,
output_path,
promote_to_multi=promote_to_multi,
geometry_type=geometry_type,
)

assert output_path.exists()
output_gdf = read_dataframe(output_path)
Expand Down Expand Up @@ -1024,9 +1034,9 @@ def test_read_dataset_kwargs(data_dir, use_arrow):
),
],
)
def test_read_invalid_dataset_kwargs(capfd, naturalearth_lowres, use_arrow):
read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")
assert "does not support open option INVALID" in capfd.readouterr().err
def test_read_invalid_dataset_kwargs(naturalearth_lowres, use_arrow):
with pytest.warns(RuntimeWarning, match="does not support open option INVALID"):
read_dataframe(naturalearth_lowres, use_arrow=use_arrow, INVALID="YES")


def test_write_nullable_dtypes(tmp_path):
Expand Down
13 changes: 12 additions & 1 deletion pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import json
import os
import sys
Expand Down Expand Up @@ -270,6 +271,7 @@ def test_write(tmpdir, naturalearth_lowres):

def test_write_gpkg(tmpdir, naturalearth_lowres):
meta, _, geometry, field_data = read(naturalearth_lowres)
meta.update({"geometry_type": "MultiPolygon"})
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem here is that reading the Shapefile will indicate the geometry type is "Polygon" (because shapefiles don't care about the distinction), but then using that meta to write to geopackage will raise a warning because geopackage is stricter


filename = os.path.join(str(tmpdir), "test.gpkg")
write(filename, geometry, field_data, driver="GPKG", **meta)
Expand Down Expand Up @@ -456,9 +458,11 @@ def assert_equal_result(result1, result2):
assert all([np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2)])


@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When reading from bytes / file-like objects, we create a vsimem temporary in-memory file, but because we don't know the file type at that moment, we get a warning like "File /vsimem/5332805636f74498b74520a2c132082b has GPKG application_id, but non conformant file extension"
Ideally we would solve that (since the user cannot do anything about that warning, since this name is auto-generated), but not sure how. We don't know the type of file when it's a file-like object, and it's only GDAL that infers it based on the content of the file. Maybe we should open an issue about upstream in GDAL to relax this constraint for vsimem files.

@pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
def test_read_from_bytes(tmpdir, naturalearth_lowres, driver, ext):
meta, index, geometry, field_data = read(naturalearth_lowres)
meta.update({"geometry_type": "Unknown"})
filename = os.path.join(str(tmpdir), f"test.{ext}")
write(filename, geometry, field_data, driver=driver, **meta)

Expand All @@ -480,9 +484,11 @@ def test_read_from_bytes_zipped(tmpdir, naturalearth_lowres_vsi):
assert_equal_result((meta, index, geometry, field_data), result2)


@pytest.mark.filterwarnings("ignore:File /vsimem:RuntimeWarning") # TODO
@pytest.mark.parametrize("driver,ext", [("GeoJSON", "geojson"), ("GPKG", "gpkg")])
def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
meta, index, geometry, field_data = read(naturalearth_lowres)
meta.update({"geometry_type": "Unknown"})
filename = os.path.join(str(tmpdir), f"test.{ext}")
write(filename, geometry, field_data, driver=driver, **meta)

Expand Down Expand Up @@ -647,7 +653,12 @@ def test_write_float_nan_null(tmp_path, dtype):

# set to False
# by default, GDAL will skip the property for GeoJSON if the value is NaN
write(fname, geometry, field_data, fields, **meta, nan_as_null=False)
if dtype == "float32":
ctx = pytest.warns(RuntimeWarning, match="NaN of Infinity value found. Skipped")
else:
ctx = contextlib.nullcontext()
Comment on lines +656 to +659
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is bizarre, but for some reason GDAL only raise the warning if the dtype is float32, while the value is actually skipped for both float32 and float64

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like an issue to raise upstream?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, so the issue seems to be that it only warns once, not depending on float32 vs float64. I would expect it to warn once per file, and not once per session, though

https://github.com/OSGeo/gdal/blob/0626cc27486f7ff7ba80e24fe9464e933dbcda2d/ogr/ogrsf_frmts/geojson/ogrgeojsonwriter.cpp#L876-L883

with ctx:
write(fname, geometry, field_data, fields, **meta, nan_as_null=False)
with open(str(fname), "r") as f:
content = f.read()
assert '"properties": { }' in content
Expand Down