Skip to content

pygmt.blockm*: Add 'output_type' parameter for output in pandas/numpy/file formats #3103

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 108 additions & 76 deletions pygmt/src/blockm.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
"""
blockm - Block average (x, y, z) data tables by mean, median, or mode
estimation.
blockm - Block average (x, y, z) data tables by mean, median, or mode estimation.
"""

from typing import Literal

import numpy as np
import pandas as pd
from pygmt.clib import Session
from pygmt.helpers import (
GMTTempFile,
build_arg_string,
fmt_docstring,
kwargs_to_strings,
use_alias,
validate_output_table_type,
)

__doctest_skip__ = ["blockmean", "blockmedian", "blockmode"]


def _blockm(block_method, data, x, y, z, outfile, **kwargs):
def _blockm(
block_method, data, x, y, z, output_type, outfile, **kwargs
) -> pd.DataFrame | np.ndarray | None:
r"""
Block average (x, y, z) data tables by mean, median, or mode estimation.

Expand All @@ -34,38 +38,34 @@ def _blockm(block_method, data, x, y, z, outfile, **kwargs):

Returns
-------
output : pandas.DataFrame or None
Return type depends on whether the ``outfile`` parameter is set:
ret
Return type depends on ``outfile`` and ``output_type``:

- :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
is not set
- None if ``outfile`` is set (filtered output will be stored in file
set by ``outfile``)
- ``None`` if ``outfile`` is set (output will be stored in file set by
``outfile``)
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
(depends on ``output_type``)
"""
with GMTTempFile(suffix=".csv") as tmpfile:
with Session() as lib:
with lib.virtualfile_in(
output_type = validate_output_table_type(output_type, outfile=outfile)

column_names = None
if output_type == "pandas" and isinstance(data, pd.DataFrame):
column_names = data.columns.to_list()

with Session() as lib:
with (
lib.virtualfile_in(
check_kind="vector", data=data, x=x, y=y, z=z, required_z=True
) as vintbl:
# Run blockm* on data table
if outfile is None:
outfile = tmpfile.name
lib.call_module(
module=block_method,
args=build_arg_string(kwargs, infile=vintbl, outfile=outfile),
)

# Read temporary csv output to a pandas table
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame
try:
column_names = data.columns.to_list()
result = pd.read_csv(tmpfile.name, sep="\t", names=column_names)
except AttributeError: # 'str' object has no attribute 'columns'
result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">")
elif outfile != tmpfile.name: # return None if outfile set, output in outfile
result = None

return result
) as vintbl,
lib.virtualfile_out(kind="dataset", fname=outfile) as vouttbl,
):
lib.call_module(
module=block_method,
args=build_arg_string(kwargs, infile=vintbl, outfile=vouttbl),
)
return lib.virtualfile_to_dataset(
output_type=output_type, vfname=vouttbl, column_names=column_names
)


@fmt_docstring
Expand All @@ -86,7 +86,15 @@ def _blockm(block_method, data, x, y, z, outfile, **kwargs):
w="wrap",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
def blockmean(
data=None,
x=None,
y=None,
z=None,
output_type: Literal["pandas", "numpy", "file"] = "pandas",
outfile: str | None = None,
**kwargs,
) -> pd.DataFrame | np.ndarray | None:
r"""
Block average (x, y, z) data tables by mean estimation.

Expand All @@ -111,9 +119,9 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
{table-classes}.
x/y/z : 1-D arrays
Arrays of x and y coordinates and values z of the data points.

{output_type}
{outfile}
{spacing}

summary : str
[**m**\|\ **n**\|\ **s**\|\ **w**].
Type of summary values calculated by blockmean.
Expand All @@ -122,12 +130,7 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
- **n** - report the number of input points inside each block
- **s** - report the sum of all z-values inside a block
- **w** - report the sum of weights

{region}

outfile : str
The file name for the output ASCII file.

{verbose}
{aspatial}
{binary}
Expand All @@ -142,13 +145,13 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):

Returns
-------
output : pandas.DataFrame or None
Return type depends on whether the ``outfile`` parameter is set:
ret
Return type depends on ``outfile`` and ``output_type``:

- :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
is not set.
- None if ``outfile`` is set (filtered output will be stored in file
set by ``outfile``).
- ``None`` if ``outfile`` is set (output will be stored in file set by
``outfile``)
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
(depends on ``output_type``)

Example
-------
Expand All @@ -159,7 +162,14 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
>>> data_bmean = pygmt.blockmean(data=data, region=[245, 255, 20, 30], spacing="5m")
"""
return _blockm(
block_method="blockmean", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
block_method="blockmean",
data=data,
x=x,
y=y,
z=z,
output_type=output_type,
outfile=outfile,
**kwargs,
)


Expand All @@ -180,7 +190,15 @@ def blockmean(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
w="wrap",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
def blockmedian(
data=None,
x=None,
y=None,
z=None,
output_type: Literal["pandas", "numpy", "file"] = "pandas",
outfile: str | None = None,
**kwargs,
) -> pd.DataFrame | np.ndarray | None:
r"""
Block average (x, y, z) data tables by median estimation.

Expand All @@ -205,14 +223,10 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
{table-classes}.
x/y/z : 1-D arrays
Arrays of x and y coordinates and values z of the data points.

{output_type}
{outfile}
{spacing}

{region}

outfile : str
The file name for the output ASCII file.

{verbose}
{aspatial}
{binary}
Expand All @@ -227,13 +241,13 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):

Returns
-------
output : pandas.DataFrame or None
Return type depends on whether the ``outfile`` parameter is set:
ret
Return type depends on ``outfile`` and ``output_type``:

- :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
is not set.
- None if ``outfile`` is set (filtered output will be stored in file
set by ``outfile``).
- ``None`` if ``outfile`` is set (output will be stored in file set by
``outfile``)
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
(depends on ``output_type``)

Example
-------
Expand All @@ -246,7 +260,14 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
... )
"""
return _blockm(
block_method="blockmedian", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
block_method="blockmedian",
data=data,
x=x,
y=y,
z=z,
output_type=output_type,
outfile=outfile,
**kwargs,
)


Expand All @@ -267,7 +288,15 @@ def blockmedian(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
w="wrap",
)
@kwargs_to_strings(I="sequence", R="sequence", i="sequence_comma", o="sequence_comma")
def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
def blockmode(
data=None,
x=None,
y=None,
z=None,
output_type: Literal["pandas", "numpy", "file"] = "pandas",
outfile: str | None = None,
**kwargs,
) -> pd.DataFrame | np.ndarray | None:
r"""
Block average (x, y, z) data tables by mode estimation.

Expand All @@ -292,14 +321,10 @@ def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
{table-classes}.
x/y/z : 1-D arrays
Arrays of x and y coordinates and values z of the data points.

{output_type}
{outfile}
{spacing}

{region}

outfile : str
The file name for the output ASCII file.

{verbose}
{aspatial}
{binary}
Expand All @@ -314,13 +339,13 @@ def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):

Returns
-------
output : pandas.DataFrame or None
Return type depends on whether the ``outfile`` parameter is set:
ret
Return type depends on ``outfile`` and ``output_type``:

- :class:`pandas.DataFrame` table with (x, y, z) columns if ``outfile``
is not set.
- None if ``outfile`` is set (filtered output will be stored in file
set by ``outfile``).
- ``None`` if ``outfile`` is set (output will be stored in file set by
``outfile``)
- :class:`pandas.DataFrame` or :class:`numpy.ndarray` if ``outfile`` is not set
(depends on ``output_type``)

Example
-------
Expand All @@ -331,5 +356,12 @@ def blockmode(data=None, x=None, y=None, z=None, outfile=None, **kwargs):
>>> data_bmode = pygmt.blockmode(data=data, region=[245, 255, 20, 30], spacing="5m")
"""
return _blockm(
block_method="blockmode", data=data, x=x, y=y, z=z, outfile=outfile, **kwargs
block_method="blockmode",
data=data,
x=x,
y=y,
z=z,
output_type=output_type,
outfile=outfile,
**kwargs,
)