Skip to content

Commit

Permalink
ENH: support reading numeric subtypes (bool, int16, float32) (#83)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored Apr 29, 2022
1 parent b5ee1fc commit 84b3980
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 2 deletions.
18 changes: 16 additions & 2 deletions pyogrio/_io.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ FIELD_TYPES = [
None # OFTInteger64List, List of 64bit integers, not supported
]

FIELD_SUBTYPES = {
OFSTNone: None, # No subtype
OFSTBoolean: "bool", # Boolean integer
OFSTInt16: "int16", # Signed 16-bit integer
OFSTFloat32: "float32", # Single precision (32 bit) floating point
}

# Mapping of numpy ndarray dtypes to (field type, subtype)
DTYPE_OGR_FIELD_TYPES = {
'int8': (OFTInteger, OFSTInt16),
Expand Down Expand Up @@ -310,6 +317,7 @@ cdef get_fields(OGRLayerH ogr_layer, str encoding):
cdef int field_count
cdef OGRFeatureDefnH ogr_featuredef = NULL
cdef OGRFieldDefnH ogr_fielddef = NULL
cdef int field_subtype
cdef const char *key_c

try:
Expand Down Expand Up @@ -345,6 +353,12 @@ cdef get_fields(OGRLayerH ogr_layer, str encoding):
f"Skipping field {field_name}: unsupported OGR type: {field_type}")
continue

field_subtype = OGR_Fld_GetSubType(ogr_fielddef)
subtype = FIELD_SUBTYPES.get(field_subtype)
if subtype is not None:
# bool, int16, float32 dtypes
np_type = subtype

fields_view[i,0] = i
fields_view[i,1] = field_type
fields_view[i,2] = field_name
Expand Down Expand Up @@ -1250,10 +1264,10 @@ def ogr_write(str path, str layer, str driver, geometry, field_data, fields,
ogr_fielddef = exc_wrap_pointer(OGR_Fld_Create(name_b, field_type))

# subtypes, see: https://gdal.org/development/rfc/rfc50_ogr_field_subtype.html
if field_type != OFSTNone:
if field_subtype != OFSTNone:
OGR_Fld_SetSubType(ogr_fielddef, field_subtype)

if field_type:
if width:
OGR_Fld_SetWidth(ogr_fielddef, width)

# TODO: set precision
Expand Down
40 changes: 40 additions & 0 deletions pyogrio/tests/test_raw_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,43 @@ def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext):
result2 = read(f)

assert_equal_result((meta, index, geometry, field_data), result2)


@pytest.mark.parametrize("ext", ["gpkg", "fgb"])
def test_read_write_data_types_numeric(tmp_path, ext):
# Point(0, 0)
geometry = np.array(
[bytes.fromhex("010100000000000000000000000000000000000000")] * 3, dtype=object
)
field_data = [
np.array([True, False, True], dtype="bool"),
np.array([1, 2, 3], dtype="int16"),
np.array([1, 2, 3], dtype="int32"),
np.array([1, 2, 3], dtype="int64"),
np.array([1, 2, 3], dtype="float32"),
np.array([1, 2, 3], dtype="float64"),
]
fields = ["bool", "int16", "int32", "int64", "float32", "float64"]
meta = dict(geometry_type="Point", crs="EPSG:4326", spatial_index=False)

filename = tmp_path / f"test.{ext}"
write(filename, geometry, field_data, fields, **meta)
result = read(filename)[3]
assert all([np.array_equal(f1, f2) for f1, f2 in zip(result, field_data)])
assert all([f1.dtype == f2.dtype for f1, f2 in zip(result, field_data)])

# other integer data types that don't roundtrip exactly
# these are generally promoted to a larger integer type except for uint64
for i, (dtype, result_dtype) in enumerate([
("int8", "int16"),
("uint8", "int16"),
("uint16", "int32"),
("uint32", "int64"),
("uint64", "int64")
]):
field_data = [np.array([1, 2, 3], dtype=dtype)]
filename = tmp_path / f"test{i}.{ext}"
write(filename, geometry, field_data, ["col"], **meta)
result = read(filename)[3][0]
assert np.array_equal(result, np.array([1, 2, 3]))
assert result.dtype == result_dtype

0 comments on commit 84b3980

Please sign in to comment.