Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run-s3-test-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ jobs:
pip install -e .
- name: Run tests
run: |
pytest -n 2
pytest -n 2 -m "not slow"
- name: Run S3 exploratory tests
run: |
pytest tests/s3_exploratory/test_s3_reduction.py --html=test-reports/s3-exploratory-report.html
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/run-test-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ jobs:
pip install -e .
- run: pip install -e .
- run: conda list
- run: pytest -n 2 --junitxml=report-1.xml
- run: pytest -n 2 -m "not slow" --junitxml=report-1.xml
- uses: codecov/codecov-action@v5
22 changes: 21 additions & 1 deletion activestorage/active.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,21 @@ def method(self, value):

self._method = value

@property
def mean(self):
self._method = "mean"
return self

@property
def min(self):
self._method = "min"
return self

@property
def max(self):
self._method = "max"
return self

@property
def ncvar(self):
"""Return or set the netCDF variable name."""
Expand All @@ -361,7 +376,6 @@ def ncvar(self):
def ncvar(self, value):
self._ncvar = value


def _get_active(self, method, *args):
"""
*args defines a slice of data. This method loops over each of the chunks
Expand Down Expand Up @@ -412,6 +426,9 @@ def _from_storage(self, ds, indexer, chunks, out_shape, out_dtype, compressor, f

# Whether or not we need to store reduction counts
need_counts = self.components or self._method == "mean"
# but never when we don't have a statistical method
if self.components and self._method is None:
raise ValueError("Setting components to True for None statistical method.")

if method is not None:
# Get the number of chunks per axis
Expand Down Expand Up @@ -539,6 +556,9 @@ def _from_storage(self, ds, indexer, chunks, out_shape, out_dtype, compressor, f
# to a masked value in 'out'.
out = out / n

# reset the method to start from a clean property
self._method = None

return out

def _get_endpoint_url(self):
Expand Down
104 changes: 100 additions & 4 deletions tests/test_bigger_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def save_cl_file_with_a(tmp_path):
return uri


def test_cl(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
def test_cl_old_method(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 0
d = active[4:5, 1:2]
Expand All @@ -125,6 +125,58 @@ def test_cl(tmp_path):
np.testing.assert_array_equal(mean_result, result2["sum"]/result2["n"])


def test_cl_mean(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 0
d = active[4:5, 1:2]
mean_result = np.mean(d)

active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
active.components = True
result2 = active.mean[4:5, 1:2]
print(result2, ncfile)
# expect {'sum': array([[[[264.]]]], dtype=float32), 'n': array([[[[12]]]])}
# check for typing and structure
np.testing.assert_array_equal(result2["sum"], np.array([[[[264.]]]], dtype="float32"))
np.testing.assert_array_equal(result2["n"], np.array([[[[12]]]]))
# check for active
np.testing.assert_array_equal(mean_result, result2["sum"]/result2["n"])


def test_cl_min(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
result2 = active.min[4:5, 1:2]
np.testing.assert_array_equal(result2, np.array([[[[22.]]]], dtype="float32"))


def test_cl_max(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
result2 = active.max[4:5, 1:2]
np.testing.assert_array_equal(result2, np.array([[[[22.]]]], dtype="float32"))


def test_cl_global_max(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
result2 = active.max[:]
np.testing.assert_array_equal(result2, np.array([[[[22.]]]], dtype="float32"))


def test_cl_maxxx(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "cl", storage_type=utils.get_storage_type())
active._version = 2
with pytest.raises(AttributeError):
result2 = active.maxxx[:]


def test_ps(tmp_path):
ncfile = save_cl_file_with_a(tmp_path)
active = Active(ncfile, "ps", storage_type=utils.get_storage_type())
Expand All @@ -134,9 +186,8 @@ def test_ps(tmp_path):

active = Active(ncfile, "ps", storage_type=utils.get_storage_type())
active._version = 2
active.method = "mean"
active.components = True
result2 = active[4:5, 1:2]
result2 = active.mean[4:5, 1:2]
print(result2, ncfile)
# expect {'sum': array([[[22.]]]), 'n': array([[[4]]])}
# check for typing and structure
Expand Down Expand Up @@ -277,3 +328,48 @@ def test_daily_data_masked(test_data_path):
np.testing.assert_array_equal(result2["n"], 680)
# check for active
np.testing.assert_allclose(mean_result, result2["sum"]/result2["n"], rtol=1e-6)


def test_daily_data_masked_no_stats_yes_components(test_data_path):
"""
Test again with a daily data file, with masking on
"""
ncfile = str(test_data_path / "daily_data_masked.nc")
uri = utils.write_to_storage(ncfile)
active = Active(uri, "ta", storage_type=utils.get_storage_type())
active._version = 2
active.components = True
raised = "Setting components to True for None statistical method."
with pytest.raises(ValueError) as exc:
result2 = active[3:4, 0, 2]
assert raised == str(exc)


def test_daily_data_masked_no_stats_no_components(test_data_path):
"""
Test again with a daily data file, with masking on
"""
ncfile = str(test_data_path / "daily_data_masked.nc")
uri = utils.write_to_storage(ncfile)
active = Active(uri, "ta", storage_type=utils.get_storage_type())
active._version = 2
result2 = active[3:4, 0, 2][0][0]
assert result2 == 250.35127


def test_daily_data_masked_two_stats(test_data_path):
"""
Test again with a daily data file, with masking on
"""
ncfile = str(test_data_path / "daily_data_masked.nc")
uri = utils.write_to_storage(ncfile)

# first a mean
active = Active(uri, "ta", storage_type=utils.get_storage_type())
active._version = 2
result2 = active.min[:]
assert result2 == 245.0020751953125

# then recycle Active object for something else
# check method is reset
assert active._method is None
33 changes: 17 additions & 16 deletions tests/test_real_https.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@ def test_https():

active = Active(test_file_uri, "cl", storage_type="https")
active._version = 1
active._method = "min"
result = active[0:3, 4:6, 7:9]
result = active.min[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")

Expand All @@ -26,12 +25,14 @@ def test_https_100years():
test_file_uri = "https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/CMIP/MOHC/UKESM1-1-LL/historical/r1i1p1f2/Amon/pr/gn/latest/pr_Amon_UKESM1-1-LL_historical_r1i1p1f2_gn_195001-201412.nc"
active = Active(test_file_uri, "pr")
active._version = 1
active._method = "min"
result = active[0:3, 4:6, 7:9]
result = active.min[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([5.4734613e-07], dtype="float32")


# this could be a slow test on GHA depending on network load
# also Githb machines are very far from Oxford
@pytest.mark.slow
def test_https_reductionist():
"""Run a true test with a https FILE."""
test_file_uri = "https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/AerChemMIP/MOHC/UKESM1-0-LL/ssp370SST-lowNTCF/r1i1p1f2/Amon/cl/gn/latest/cl_Amon_UKESM1-0-LL_ssp370SST-lowNTCF_r1i1p1f2_gn_205001-209912.nc"
Expand All @@ -41,20 +42,21 @@ def test_https_reductionist():
with pytest.raises(activestorage.reductionist.ReductionistError):
active = Active(test_file_uri, "cl")
active._version = 2
active._method = "min"
result = active[0:3, 4:6, 7:9]
result = active.min[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")


# this could be a slow test on GHA depending on network load
# also Githb machines are very far from Oxford
@pytest.mark.slow
def test_https_implicit_storage():
"""Run a true test with a https FILE."""
test_file_uri = "https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/AerChemMIP/MOHC/UKESM1-0-LL/ssp370SST-lowNTCF/r1i1p1f2/Amon/cl/gn/latest/cl_Amon_UKESM1-0-LL_ssp370SST-lowNTCF_r1i1p1f2_gn_205001-209912.nc"

active = Active(test_file_uri, "cl")
active._version = 1
active._method = "min"
result = active[0:3, 4:6, 7:9]
result = active.min[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")

Expand All @@ -70,8 +72,7 @@ def test_https_implicit_storage_file_not_found():
with pytest.raises(FileNotFoundError):
active = Active(test_file_uri, "cl")
active._version = 1
active._method = "min"
result = active[0:3, 4:6, 7:9]
result = active.min[0:3, 4:6, 7:9]


def test_https_implicit_storage_wrong_url():
Expand All @@ -83,8 +84,7 @@ def test_https_implicit_storage_wrong_url():
with pytest.raises(ValueError):
active = Active(test_file_uri, "cl")
active._version = 1
active._method = "min"
result = active[0:3, 4:6, 7:9]
result = active.min[0:3, 4:6, 7:9]


@pytest.mark.skip(reason="save time: test_https_dataset_implicit_storage is more general.")
Expand All @@ -96,12 +96,14 @@ def test_https_dataset():

active = Active(av, storage_type="https")
active._version = 1
active._method = "min"
result = active[0:3, 4:6, 7:9]
result = active.min[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")


# this could be a slow test on GHA depending on network load
# also Githb machines are very far from Oxford
@pytest.mark.slow
def test_https_dataset_implicit_storage():
"""Run a true test with a https DATASET."""
test_file_uri = "https://esgf.ceda.ac.uk/thredds/fileServer/esg_cmip6/CMIP6/AerChemMIP/MOHC/UKESM1-0-LL/ssp370SST-lowNTCF/r1i1p1f2/Amon/cl/gn/latest/cl_Amon_UKESM1-0-LL_ssp370SST-lowNTCF_r1i1p1f2_gn_205001-209912.nc"
Expand All @@ -110,7 +112,6 @@ def test_https_dataset_implicit_storage():

active = Active(av)
active._version = 1
active._method = "min"
result = active[0:3, 4:6, 7:9]
result = active.min[0:3, 4:6, 7:9]
print("Result is", result)
assert result == np.array([0.6909787], dtype="float32")
16 changes: 8 additions & 8 deletions tests/test_real_s3.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import os
import numpy as np
import pytest

from activestorage.active import Active
from activestorage.active import load_from_s3


S3_BUCKET = "bnl"

# this could be a slow test on GHA depending on network load
# also Githb machines are very far from Oxford
@pytest.mark.slow
def test_s3_dataset():
"""Run somewhat as the 'gold' test."""
# NOTE: "https://uor-aces-o.s3-ext.jc.rl.ac.uk" is the stable S3 JASMIN
Expand All @@ -33,8 +37,7 @@ def test_s3_dataset():
storage_options=storage_options,
active_storage_url=active_storage_url)
active._version = 2
active._method = "min"
result = active[0:3, 4:6, 7:9] # standardized slice
result = active.min[0:3, 4:6, 7:9] # standardized slice
print("Result is", result)
assert result == 5098.625

Expand All @@ -43,8 +46,7 @@ def test_s3_dataset():
storage_options=storage_options,
active_storage_url=active_storage_url)
active._version = 2
active._method = "min"
result = active[0:3, 4:6, 7:9] # standardized slice
result = active.min[0:3, 4:6, 7:9] # standardized slice
print("Result is", result)
assert result == 5098.625

Expand All @@ -57,8 +59,7 @@ def test_s3_dataset():
storage_options=storage_options,
active_storage_url=active_storage_url)
active._version = 2
active._method = "min"
result = active[0:3, 4:6, 7:9] # standardized slice
result = active.min[0:3, 4:6, 7:9] # standardized slice
print("Result is", result)
assert result == 5098.625

Expand All @@ -67,7 +68,6 @@ def test_s3_dataset():
storage_options=storage_options,
active_storage_url=active_storage_url)
active._version = 2
active._method = "min"
result = active[0:3, 4:6, 7:9] # standardized slice
result = active.min[0:3, 4:6, 7:9] # standardized slice
print("Result is", result)
assert result == 5098.625
26 changes: 26 additions & 0 deletions tests/unit/test_active.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,32 @@ def test_activevariable_pyfive():
assert av_slice_min == np_slice_min


def test_activevariable_pyfive_with_attributed_min():
uri = "tests/test_data/cesm2_native.nc"
ncvar = "TREFHT"
ds = pyfive.File(uri)[ncvar]
av = Active(ds)
av_slice_min = av.min[3:5]
assert av_slice_min == np.array(258.62814, dtype="float32")
# test with Numpy
np_slice_min = np.min(ds[3:5])
assert av_slice_min == np_slice_min


def test_activevariable_pyfive_with_attributed_mean():
uri = "tests/test_data/cesm2_native.nc"
ncvar = "TREFHT"
ds = pyfive.File(uri)[ncvar]
av = Active(ds)
av.components = True
av_slice_min = av.mean[3:5]
actual_mean = av_slice_min["sum"] / av_slice_min["n"]
assert actual_mean == np.array(283.39508056640625, dtype="float32")
# test with Numpy
np_slice_min = np.mean(ds[3:5])
assert np.isclose(actual_mean, np_slice_min)


@pytest.mark.xfail(reason="We don't employ locks with Pyfive anymore, yet.")
def test_lock():
"""Unit test for class:Active."""
Expand Down
Loading
Loading