Skip to content

Support passing string type numbers, geographic coordinates and datetimes #975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Apr 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ Low level access (these are mostly used by the :mod:`pygmt.clib` package):
clib.Session.get_default
clib.Session.create_data
clib.Session.put_matrix
clib.Session.put_strings
clib.Session.put_vector
clib.Session.write_data
clib.Session.open_virtual_file
Expand Down
26 changes: 13 additions & 13 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
np.uint64: "GMT_ULONG",
np.uint32: "GMT_UINT",
np.datetime64: "GMT_DATETIME",
np.str_: "GMT_TEXT",
}


Expand Down Expand Up @@ -719,9 +720,7 @@ def _check_dtype_and_dim(self, array, ndim):
"""
# check the array has the given dimension
if array.ndim != ndim:
raise GMTInvalidInput(
"Expected a numpy 1d array, got {}d.".format(array.ndim)
)
raise GMTInvalidInput(f"Expected a numpy 1d array, got {array.ndim}d.")

# check the array has a valid/known data type
if array.dtype.type not in DTYPES:
Expand All @@ -745,7 +744,7 @@ def put_vector(self, dataset, column, vector):
first. Use ``family='GMT_IS_DATASET|GMT_VIA_VECTOR'``.

Not at all numpy dtypes are supported, only: float64, float32, int64,
int32, uint64, and uint32.
int32, uint64, uint32, datetime64 and str_.

.. warning::
The numpy array must be C contiguous in memory. If it comes from a
Expand Down Expand Up @@ -777,23 +776,24 @@ def put_vector(self, dataset, column, vector):
)

gmt_type = self._check_dtype_and_dim(vector, ndim=1)
if gmt_type == self["GMT_DATETIME"]:
if gmt_type in (self["GMT_TEXT"], self["GMT_DATETIME"]):
vector_pointer = (ctp.c_char_p * len(vector))()
vector_pointer[:] = np.char.encode(
np.datetime_as_string(array_to_datetime(vector))
)
if gmt_type == self["GMT_DATETIME"]:
vector_pointer[:] = np.char.encode(
np.datetime_as_string(array_to_datetime(vector))
)
else:
vector_pointer[:] = np.char.encode(vector)
else:
vector_pointer = vector.ctypes.data_as(ctp.c_void_p)
status = c_put_vector(
self.session_pointer, dataset, column, gmt_type, vector_pointer
)
if status != 0:
raise GMTCLibError(
" ".join(
[
"Failed to put vector of type {}".format(vector.dtype),
"in column {} of dataset.".format(column),
]
(
f"Failed to put vector of type {vector.dtype} "
f"in column {column} of dataset."
)
)

Expand Down
70 changes: 70 additions & 0 deletions pygmt/tests/test_clib_put_vector.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Test the functions that put vector data into GMT.
"""
import itertools
from datetime import datetime

import numpy as np
import numpy.testing as npt
Expand Down Expand Up @@ -90,6 +91,75 @@ def test_put_vector_mixed_dtypes():
npt.assert_allclose(newy, y)


def test_put_vector_string_dtype():
"""
Passing string type vectors to a dataset.
"""
# input string vectors: numbers, longitudes, latitudes, and datetimes
vectors = np.array(
[
["10", "20.0", "-30.0", "3.5e1"],
["10W", "30.50E", "30:30W", "40:30:30.500E"],
["10N", "30.50S", "30:30N", "40:30:30.500S"],
["2021-02-03", "2021-02-03T04", "2021-02-03T04:05:06.700", "T04:50:06.700"],
]
)
# output vectors in double or string type
# Notes:
# 1. longitudes and latitudes are stored in double in GMT
# 2. The default output format for datetime is YYYY-mm-ddTHH:MM:SS
expected_vectors = [
[10.0, 20.0, -30.0, 35],
[-10, 30.5, -30.5, 40.508472],
[10, -30.50, 30.5, -40.508472],
[
"2021-02-03T00:00:00",
"2021-02-03T04:00:00",
"2021-02-03T04:05:06",
f"{datetime.utcnow().strftime('%Y-%m-%d')}T04:50:06",
],
]

# loop over all possible combinations of input types
for i, j in itertools.combinations_with_replacement(range(4), r=2):
with clib.Session() as lib:
dataset = lib.create_data(
family="GMT_IS_DATASET|GMT_VIA_VECTOR",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[2, 4, 1, 0], # columns, rows, layers, dtype
)
lib.put_vector(dataset, column=lib["GMT_X"], vector=vectors[i])
lib.put_vector(dataset, column=lib["GMT_Y"], vector=vectors[j])
# Turns out wesn doesn't matter for Datasets
wesn = [0] * 6
# Save the data to a file to see if it's being accessed correctly
with GMTTempFile() as tmp_file:
lib.write_data(
"GMT_IS_VECTOR",
"GMT_IS_POINT",
"GMT_WRITE_SET",
wesn,
tmp_file.name,
dataset,
)
# Load the data
output = np.genfromtxt(
tmp_file.name, dtype=None, names=("x", "y"), encoding=None
)
# check that the output is correct
# Use npt.assert_allclose for numeric arrays
# and npt.assert_array_equal for string arrays
if i != 3:
npt.assert_allclose(output["x"], expected_vectors[i])
else:
npt.assert_array_equal(output["x"], expected_vectors[i])
if j != 3:
npt.assert_allclose(output["y"], expected_vectors[j])
else:
npt.assert_array_equal(output["y"], expected_vectors[j])


def test_put_vector_invalid_dtype():
"""
Check that it fails with an exception for invalid data types.
Expand Down