Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ Removed:

### Added

- `climada.hazard.tc_tracks.TCTracks.subset_years` function [#1023](https://github.com/CLIMADA-project/climada_python/pull/1023)
- `climada.hazard.tc_tracks.TCTracks.from_FAST` function, add Australia basin (AU) [#993](https://github.com/CLIMADA-project/climada_python/pull/993)
- Add `osm-flex` package to CLIMADA core [#981](https://github.com/CLIMADA-project/climada_python/pull/981)
- `doc.tutorial.climada_entity_Exposures_osm.ipynb` tutorial explaining how to use `osm-flex`with CLIMADA
Expand Down
106 changes: 106 additions & 0 deletions climada/hazard/tc_tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import re
import shutil
import warnings
from operator import itemgetter

Check warning on line 31 in climada/hazard/tc_tracks.py

View check run for this annotation

Jenkins - WCR / Pylint

unused-import

NORMAL: Unused itemgetter imported from operator
Raw output
Used when an imported module or variable is not used.
from pathlib import Path
from typing import List, Optional

Expand Down Expand Up @@ -321,6 +322,111 @@

return out

def subset_year(

Check warning on line 325 in climada/hazard/tc_tracks.py

View check run for this annotation

Jenkins - WCR / Pylint

too-complex

LOW: 'subset_year' is too complex. The McCabe rating is 11
Raw output
no description found

Check warning on line 325 in climada/hazard/tc_tracks.py

View check run for this annotation

Jenkins - WCR / Pylint

too-many-locals

LOW: Too many local variables (20/15)
Raw output
Used when a function or method has too many local variables.
self,
start_date: tuple = (False, False, False),
end_date: tuple = (False, False, False),
):
"""Subset TCTracks between start and end dates, both included.

Parameters:
----------
start_date: tuple
First date to include in the selection (YYYY, MM, DD). Each element can either
be an integer or `False`. If an element is `False`, it is ignored during the filter.
end_date: tuple
Last date to include in the selection, same as start_date if selecting only one day.

Returns:
--------
subset: TCTracks
TCTracks object containing the subset of tracks

Raises:
-------
ValueError
- If there's a mismatch between `start_*` and `end_*` values (e.g., one is set to `True` while the other is `False`).

Check warning on line 348 in climada/hazard/tc_tracks.py

View check run for this annotation

Jenkins - WCR / Pylint

line-too-long

LOW: Line too long (129/100)
Raw output
Used when a line is longer than a given number of characters.
- If no tracks are found within the specified date range.
- If `start_date` or `end_date` are incorrectly ordered (start > end).

Example 1 (Filter by Year Only):
---------------------------------
>>> start_date = (2022, False, False)
>>> end_date = (2022, False, False)
>>> # This will filter all tracks from the year 2022, regardless of month or day.

Example 2 (Filter by Year and Month):
--------------------------------------
>>> start_date = (2022, 5, False)
>>> end_date = (2022, 5, False)
>>> # This will filter all tracks from May 2022, regardless of the day.

Example 3 (Filter by Year, Month, and Day):
--------------------------------------------
>>> start_date = (2022, 5, 10)
>>> end_date = (2022, 5, 20)
>>> # This will filter all tracks from May 10th to May 20th, 2022.

Example 4 (Invalid: Only one of day is specified):
---------------------------------------------------
>>> start_date = (2022, False, 10)
>>> end_date = (2022, 5, 20)
>>> # Raises a ValueError since the day is specified in the start_date but not in end_date.
"""

subset = self.__class__()

# Extract date components
start_year, end_year = start_date[0], end_date[0]
start_month, end_month = start_date[1], end_date[1]
start_day, end_day = start_date[2], end_date[2]

if (start_day and not end_day) or (not start_day and end_day):

Check warning on line 384 in climada/hazard/tc_tracks.py

View check run for this annotation

Jenkins - WCR / Pylint

no-else-raise

LOW: Unnecessary "elif" after "raise", remove the leading "el" from "elif"
Raw output
no description found
raise ValueError(
"Mismatch between start_day and end_day: Both must be either True or False."
)
elif (start_month and not end_month) or (not start_month and end_month):
raise ValueError(
"Mismatch between start_month and end_month: Both must be either True or False."
)
elif (start_year and not end_year) or (not start_year and end_year):
raise ValueError(
"Mismatch between start_year and end_year: Both must be either True or False."
)
elif start_year and end_year and start_year > end_year:
raise ValueError("Start year is after end year.")

# Find indices corresponding to the date range
index: list = []
for i, track in enumerate(self.data):

date_array = track.time[0].to_numpy()
year = date_array.astype("datetime64[Y]").item().year
month = date_array.astype("datetime64[M]").item().month
day = date_array.astype("datetime64[D]").item().day

condition_year = start_year <= year <= end_year
condition_month = start_month <= month <= end_month
condition_day = start_day <= day <= end_day

if not start_day and not end_day:
condition_day = True
if not start_month and not end_month:
condition_month = True
if not start_year and not end_year:
condition_year = True

if condition_year and condition_month and condition_day:
index.append(i)

# Raise error if no tracks found
if not index:
raise ValueError("No tracks found for the specified date range")
# Create subset with filtered tracks
subset.data = [self.data[i] for i in index]

return subset

def tracks_in_exp(self, exposure, buffer=1.0):
"""Select only the tracks that are in the vicinity (buffer) of an exposure.

Expand Down
111 changes: 111 additions & 0 deletions climada/hazard/test/test_tc_tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import unittest
from datetime import datetime as dt

import cftime
import geopandas as gpd
import numpy as np
import pandas as pd
Expand Down Expand Up @@ -763,6 +764,116 @@ def test_subset(self):
tc_track = tc.TCTracks.from_ibtracs_netcdf(storm_id=storms)
self.assertEqual(tc_track.subset({"basin": "SP"}).size, 2)

def test_subset_years(self):
"""Test that subset_years correctly select tracks between year min and year max."""

tc_test = tc.TCTracks.from_simulations_emanuel(TEST_TRACK_EMANUEL)
for i in range(5):
date = cftime.DatetimeProlepticGregorian(
2000 + i, 1 + i, 10 + i, 0, 0, 0, 0, has_year_zero=True
)
tc_test.data[i]["time"] = np.full(tc_test.data[i].time.shape[0], date)

# correct calling of the function
tc_subset = tc_test.subset_year(
start_date=(2000, False, False), end_date=(2003, False, False)
)
self.assertEqual(len(tc_subset.data), 4)
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
self.assertEqual(tc_subset.data[1].time[0].item().year, 2001)
self.assertEqual(tc_subset.data[1].time[0].item().month, 2)
self.assertEqual(tc_subset.data[2].time[0].item().year, 2002)
self.assertEqual(tc_subset.data[2].time[0].item().month, 3)
self.assertEqual(tc_subset.data[3].time[0].item().year, 2003)
self.assertEqual(tc_subset.data[3].time[0].item().month, 4)
tc_subset = tc_test.subset_year(
start_date=(2000, False, False), end_date=(2000, False, False)
)
self.assertEqual(len(tc_subset.data), 1)
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
tc_subset = tc_test.subset_year(
start_date=(False, 1, False), end_date=(False, 4, False)
)
self.assertEqual(len(tc_subset.data), 4)
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
self.assertEqual(tc_subset.data[1].time[0].item().year, 2001)
self.assertEqual(tc_subset.data[1].time[0].item().month, 2)
self.assertEqual(tc_subset.data[2].time[0].item().year, 2002)
self.assertEqual(tc_subset.data[2].time[0].item().month, 3)
self.assertEqual(tc_subset.data[3].time[0].item().year, 2003)
self.assertEqual(tc_subset.data[3].time[0].item().month, 4)
tc_subset = tc_test.subset_year(
start_date=(False, 3, False), end_date=(False, 3, False)
)
self.assertEqual(len(tc_subset.data), 1)
self.assertEqual(tc_subset.data[0].time[0].item().month, 3)
self.assertEqual(tc_subset.data[0].time[0].item().year, 2002)
tc_subset = tc_test.subset_year(
start_date=(False, False, 11), end_date=(False, False, 14)
)
self.assertEqual(len(tc_subset.data), 4)
self.assertEqual(tc_subset.data[0].time[0].item().year, 2001)
self.assertEqual(tc_subset.data[0].time[0].item().month, 2)
self.assertEqual(tc_subset.data[1].time[0].item().year, 2002)
self.assertEqual(tc_subset.data[1].time[0].item().month, 3)
self.assertEqual(tc_subset.data[2].time[0].item().year, 2003)
self.assertEqual(tc_subset.data[2].time[0].item().month, 4)
self.assertEqual(tc_subset.data[3].time[0].item().year, 2004)
self.assertEqual(tc_subset.data[3].time[0].item().month, 5)
tc_subset = tc_test.subset_year(
start_date=(False, False, 10), end_date=(False, False, 10)
)
self.assertEqual(len(tc_subset.data), 1)
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
tc_subset = tc_test.subset_year(
start_date=(2000, 1, 10), end_date=(2000, 1, 13)
)
self.assertEqual(len(tc_subset.data), 1)
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
tc_subset = tc_test.subset_year(
start_date=(2000, 1, 10), end_date=(2004, 9, 13)
)
self.assertEqual(len(tc_subset.data), 4)
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
self.assertEqual(tc_subset.data[1].time[0].item().year, 2001)
self.assertEqual(tc_subset.data[1].time[0].item().month, 2)
self.assertEqual(tc_subset.data[2].time[0].item().year, 2002)
self.assertEqual(tc_subset.data[2].time[0].item().month, 3)
self.assertEqual(tc_subset.data[3].time[0].item().year, 2003)
self.assertEqual(tc_subset.data[3].time[0].item().month, 4)

# Invalid input: Mismatch between start_day and end_day
with self.assertRaisesRegex(
ValueError,
"Mismatch between start_year and end_year: "
"Both must be either True or False.",
):
tc_test.subset_year((2000, False, False), (False, False, False))
with self.assertRaisesRegex(
ValueError,
"Mismatch between start_month and end_month: "
"Both must be either True or False.",
):
tc_test.subset_year((2000, False, False), (2000, 5, False))
with self.assertRaisesRegex(
ValueError,
"Mismatch between start_day and end_day: "
"Both must be either True or False.",
):
tc_test.subset_year((False, False, False), (False, False, 3))
with self.assertRaisesRegex(ValueError, "Start year is after end year."):
tc_test.subset_year((2007, False, False), (2000, False, False))
with self.assertRaisesRegex(
ValueError, "No tracks found for the specified date range"
):
tc_test.subset_year((2100, False, False), (2150, False, False))

def test_get_extent(self):
"""Test extent/bounds attributes."""
storms = ["1988169N14259", "2002073S16161", "2002143S07157"]
Expand Down