Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: support reading numeric subtypes (bool, int16, float32) #83

Merged
merged 4 commits into from
Apr 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ FIELD_TYPES = [
None # OFTInteger64List, List of 64bit integers, not supported
]

FIELD_SUBTYPES = {
OFSTNone: None, # No subtype
OFSTBoolean: "bool", # Boolean integer
OFSTInt16: "int16", # Signed 16-bit integer
OFSTFloat32: "float32", # Single precision (32 bit) floating point
}

# Mapping of numpy ndarray dtypes to (field type, subtype)
DTYPE_OGR_FIELD_TYPES = {
'int8': (OFTInteger, OFSTInt16),
Expand Down Expand Up @@ -310,6 +317,7 @@ cdef get_fields(OGRLayerH ogr_layer, str encoding):
cdef int field_count
cdef OGRFeatureDefnH ogr_featuredef = NULL
cdef OGRFieldDefnH ogr_fielddef = NULL
cdef int field_subtype
cdef const char *key_c

try:
Expand Down Expand Up @@ -345,6 +353,12 @@ cdef get_fields(OGRLayerH ogr_layer, str encoding):
f"Skipping field {field_name}: unsupported OGR type: {field_type}")
continue

field_subtype = OGR_Fld_GetSubType(ogr_fielddef)
subtype = FIELD_SUBTYPES.get(field_subtype)
if subtype is not None:
# bool, int16, float32 dtypes
np_type = subtype

fields_view[i,0] = i
fields_view[i,1] = field_type
fields_view[i,2] = field_name
Expand Down Expand Up @@ -1250,10 +1264,10 @@ def ogr_write(str path, str layer, str driver, geometry, field_data, fields,
ogr_fielddef = exc_wrap_pointer(OGR_Fld_Create(name_b, field_type))

# subtypes, see: https://gdal.org/development/rfc/rfc50_ogr_field_subtype.html
if field_type != OFSTNone:
if field_subtype != OFSTNone:
OGR_Fld_SetSubType(ogr_fielddef, field_subtype)

if field_type:
if width:
brendan-ward marked this conversation as resolved.
Show resolved Hide resolved
OGR_Fld_SetWidth(ogr_fielddef, width)

# TODO: set precision
Expand Down
40 changes: 40 additions & 0 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,43 @@ def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
result2 = read(f)

assert_equal_result((meta, index, geometry, field_data), result2)


@pytest.mark.parametrize("ext", ["gpkg", "fgb"])
def test_read_write_data_types_numeric(tmp_path, ext):
# Point(0, 0)
geometry = np.array(
[bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
)
field_data = [
np.array([True, False, True], dtype="bool"),
np.array([1, 2, 3], dtype="int16"),
np.array([1, 2, 3], dtype="int32"),
np.array([1, 2, 3], dtype="int64"),
np.array([1, 2, 3], dtype="float32"),
np.array([1, 2, 3], dtype="float64"),
]
fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)

filename = tmp_path / f"test.{ext}"
write(filename, geometry, field_data, fields, **meta)
result = read(filename)[3]
assert all([np.array_equal(f1, f2) for f1, f2 in zip(result, field_data)])
assert all([f1.dtype == f2.dtype for f1, f2 in zip(result, field_data)])

# other integer data types that don't roundtrip exactly
jorisvandenbossche marked this conversation as resolved.
Show resolved Hide resolved
# these are generally promoted to a larger integer type except for uint64
for i, (dtype, result_dtype) in enumerate([
("int8", "int16"),
("uint8", "int16"),
("uint16", "int32"),
("uint32", "int64"),
("uint64", "int64")
]):
field_data = [np.array([1, 2, 3], dtype=dtype)]
filename = tmp_path / f"test{i}.{ext}"
write(filename, geometry, field_data, ["col"], **meta)
result = read(filename)[3][0]
assert np.array_equal(result, np.array([1, 2, 3]))
assert result.dtype == result_dtype