diff --git a/tests/dwd/radolan/test_access.py b/tests/dwd/radolan/test_access.py index 39c672769..f68d7da90 100644 --- a/tests/dwd/radolan/test_access.py +++ b/tests/dwd/radolan/test_access.py @@ -4,7 +4,7 @@ from pathlib import Path from wetterdienst.dwd.metadata.time_resolution import TimeResolution -from wetterdienst.dwd.radolan.access import collect_radolan_data +from wetterdienst.dwd.radolan.access import _collect_radolan_data HERE = Path(__file__).parent @@ -14,7 +14,7 @@ def test_collect_radolan_data(): with Path(HERE, "radolan_hourly_201908080050").open("rb") as f: radolan_hourly = BytesIO(f.read()) - radolan_hourly_test = collect_radolan_data( + radolan_hourly_test = _collect_radolan_data( date_times=[datetime(year=2019, month=8, day=8, hour=0, minute=50)], time_resolution=TimeResolution.HOURLY, )[0][1] @@ -24,7 +24,7 @@ def test_collect_radolan_data(): with Path(HERE, "radolan_daily_201908080050").open("rb") as f: radolan_daily = BytesIO(f.read()) - radolan_daily_test = collect_radolan_data( + radolan_daily_test = _collect_radolan_data( date_times=[datetime(year=2019, month=8, day=8, hour=0, minute=50)], time_resolution=TimeResolution.DAILY, )[0][1] diff --git a/tests/dwd/radolan/test_index.py b/tests/dwd/radolan/test_index.py new file mode 100644 index 000000000..3266d639a --- /dev/null +++ b/tests/dwd/radolan/test_index.py @@ -0,0 +1,29 @@ +from wetterdienst import Parameter, TimeResolution, PeriodType +from wetterdienst.dwd.metadata.constants import DWDWeatherBase, DWDCDCBase +from wetterdienst.dwd.metadata.radar_data_types import RadarDataTypes +from wetterdienst.dwd.metadata.radar_sites import RadarSites +from wetterdienst.dwd.radolan.index import _create_file_index_radolan + + +def test_radolan_fileindex(): + + file_index = _create_file_index_radolan( + Parameter.SWEEP_VOL_VELOCITY_V, + TimeResolution.MINUTE_5, + DWDWeatherBase.RADAR_SITES, + radar_site=RadarSites.BOO, + radar_data_type=RadarDataTypes.HDF5 + ) + test_split = file_index.iat[0, 0].split('/') + assert test_split[0] == 'sweep_vol_v' + assert test_split[1] == 'boo' + assert test_split[2] == 'hdf5' + + file_index = _create_file_index_radolan( + Parameter.PX250_REFLECTIVITY, + TimeResolution.MINUTE_5, + DWDWeatherBase.RADAR_SITES, + radar_site=RadarSites.BOO) + test_split = file_index.iat[0, 0].split('/') + assert test_split[0] == 'px250' + assert test_split[1] == 'boo' diff --git a/tests/dwd/radolan/test_store.py b/tests/dwd/radolan/test_store.py new file mode 100644 index 000000000..ec7fcfd58 --- /dev/null +++ b/tests/dwd/radolan/test_store.py @@ -0,0 +1,16 @@ +from datetime import datetime +from pathlib import PosixPath + +from wetterdienst import Parameter, TimeResolution +from wetterdienst.dwd.metadata.constants import DWD_FOLDER_MAIN +from wetterdienst.dwd.radolan.store import build_local_filepath_for_radar + + +def test_build_local_filepath_for_radar(): + assert PosixPath('/home/dlassahn/projects/forecast-system/wetterdienst/' + 'dwd_data/dx/5_minutes/dx_5_minutes_202001011215') == \ + build_local_filepath_for_radar( + Parameter.DX_REFLECTIVITY, + datetime(2020, 1, 1, 12, 15), + DWD_FOLDER_MAIN, + TimeResolution.MINUTE_5) diff --git a/tests/dwd/test_index.py b/tests/dwd/test_index.py index bea8a6fed..32408c4de 100644 --- a/tests/dwd/test_index.py +++ b/tests/dwd/test_index.py @@ -1,10 +1,11 @@ import pytest +from wetterdienst.dwd.metadata.constants import DWDCDCBase from wetterdienst.dwd.metadata.parameter import Parameter from wetterdienst.dwd.metadata.period_type import PeriodType from wetterdienst import TimeResolution from wetterdienst.util.network import list_remote_files -from wetterdienst.dwd.index import build_path_to_parameter +from wetterdienst.dwd.index import build_path_to_parameter, _create_file_index_for_dwd_server from wetterdienst.dwd.observations.store import build_local_filepath_for_station_data @@ -36,3 +37,17 @@ def test_list_files_of_climate_observations(): "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/" "annual/kl/recent/jahreswerte_KL_01048_akt.zip" in files_server ) + + +def test_fileindex(): + + file_index = _create_file_index_for_dwd_server( + Parameter.CLIMATE_SUMMARY, + TimeResolution.DAILY, + DWDCDCBase.CLIMATE_OBSERVATIONS, + period_type=PeriodType.RECENT) + + test_split = file_index.iat[0, 0].split('/') + assert test_split[0] == 'daily' + assert test_split[1] == 'kl' + assert test_split[2] == 'recent' diff --git a/wetterdienst/dwd/metadata/constants.py b/wetterdienst/dwd/metadata/constants.py index 23c3fa0a2..1903363ab 100644 --- a/wetterdienst/dwd/metadata/constants.py +++ b/wetterdienst/dwd/metadata/constants.py @@ -14,6 +14,12 @@ class DWDCDCBase(Enum): GRIDS_GERMANY = "grids_germany/" +class DWDWeatherBase(Enum): + PATH = "weather" + RADAR_COMPOSITE = "radar/composite" + RADAR_SITES = "radar/sites" + + DWD_FOLDER_MAIN = "./dwd_data" DWD_FOLDER_STATION_DATA = "station_data" DWD_FILE_STATION_DATA = "dwd_station_data" diff --git a/wetterdienst/dwd/metadata/radar_sites.py b/wetterdienst/dwd/metadata/radar_sites.py index fd69f54be..ac1036004 100644 --- a/wetterdienst/dwd/metadata/radar_sites.py +++ b/wetterdienst/dwd/metadata/radar_sites.py @@ -69,3 +69,29 @@ 'latitude': 52.160096, 'longitude': 11.176091} } + +""" enumeration for Radar Sites """ +from enum import Enum + + +class RadarSites(Enum): + """ + enumeration for the different radar locations/sites + """ + ASB = 'asb' + BOO = 'boo' + DRS = 'drs' + EIS = 'eis' + ESS = 'ess' + FGB = 'fbg' + FLD = 'fld' + ISN = 'isn' + HNR = 'hnr' + MEM = 'mem' + NEU = 'neu' + NHB = 'nhb' + OFT = 'oft' + PRO = 'pro' + ROS = 'ros' + TUR = 'tur' + UMD = 'umd' diff --git a/wetterdienst/dwd/metadata/time_resolution.py b/wetterdienst/dwd/metadata/time_resolution.py index 18b9e8b0e..332183cd6 100644 --- a/wetterdienst/dwd/metadata/time_resolution.py +++ b/wetterdienst/dwd/metadata/time_resolution.py @@ -17,6 +17,8 @@ class TimeResolution(Enum): DAILY = "daily" MONTHLY = "monthly" ANNUAL = "annual" + MINUTE_5 = "5_minutes" + MINUTE_15 = "15_minutes" TIME_RESOLUTION_TO_DATETIME_FORMAT_MAPPING: Dict[TimeResolution, str] = { diff --git a/wetterdienst/dwd/observations/store.py b/wetterdienst/dwd/observations/store.py index 69a31656b..504aef404 100644 --- a/wetterdienst/dwd/observations/store.py +++ b/wetterdienst/dwd/observations/store.py @@ -8,7 +8,6 @@ DWD_FOLDER_STATION_DATA, DWD_FILE_STATION_DATA, DataFormat, -) def store_climate_observations( diff --git a/wetterdienst/dwd/radolan/access.py b/wetterdienst/dwd/radolan/access.py index 73fec8040..612239296 100644 --- a/wetterdienst/dwd/radolan/access.py +++ b/wetterdienst/dwd/radolan/access.py @@ -6,7 +6,7 @@ from pathlib import Path from typing import Tuple, List, Union -from wetterdienst import TimeResolution +from wetterdienst import TimeResolution, Parameter from wetterdienst.dwd.metadata.constants import DWD_FOLDER_MAIN from wetterdienst.dwd.network import download_file_from_dwd @@ -19,7 +19,8 @@ log = logging.getLogger(__name__) -def collect_radolan_data( +def collect_radar_data( + parameter: Parameter, date_times: List[datetime], time_resolution: TimeResolution, prefer_local: bool = False, @@ -27,20 +28,55 @@ def collect_radolan_data( folder: Union[str, Path] = DWD_FOLDER_MAIN, ) -> List[Tuple[datetime, BytesIO]]: """ - Function used to collect RADOLAN data for given datetimes and a time resolution. + Function used to collect Radar data for given datetimes and a time resolution. Additionally the file can be written to a local folder and read from there as well. + Args: + parameter: What type of radar data should be collected + date_times: list of datetime objects for which radar data shall be acquired + time_resolution: the time resolution for requested data, either hourly or daily + prefer_local: boolean if file should be read from local store instead + write_file: boolean if file should be stored on the drive + folder: path for storage - :param date_times: List of datetime objects for which RADOLAN shall be acquired - :param time_resolution: Time resolution for requested data, either hourly or daily - :param prefer_local: File should be read from local store instead - :param write_file: File should be stored on the drive - :param folder: Path for storage - - :return: List of tuples: datetime and the corresponding file in bytes + Returns: + list of tuples of a datetime and the corresponding file in bytes """ - if time_resolution not in (TimeResolution.HOURLY, TimeResolution.DAILY): - raise ValueError("RADOLAN is only offered in hourly and daily resolution.") + if time_resolution not in (TimeResolution.HOURLY, + TimeResolution.DAILY, + TimeResolution.MINUTE_5, + TimeResolution.MINUTE_15): + raise ValueError("Wrong TimeResolution for RadarData") + + if parameter == Parameter.RADOLAN: + return _collect_radolan_data(date_times, + time_resolution, + prefer_local, + write_file, + folder) + else: + raise ValueError("You have passed a non valid radar data Parameter. " + "Valid Radar data:") + +def _collect_radolan_data( + date_times: List[datetime], + time_resolution: TimeResolution, + prefer_local: bool = False, + write_file: bool = False, + folder: Union[str, Path] = DWD_FOLDER_MAIN, +) -> List[Tuple[datetime, BytesIO]]: + """ + Function used to collect RADOLAN data for given datetimes and a time resolution. + Additionally the file can be written to a local folder and read from there as well. + Args: + date_times: list of datetime objects for which RADOLAN shall be acquired + time_resolution: the time resolution for requested data, either hourly or daily + prefer_local: boolean if file should be read from local store instead + write_file: boolean if file should be stored on the drive + folder: path for storage + Returns: + list of tuples of a datetime and the corresponding file in bytes + """ data = [] # datetime = pd.to_datetime(datetime).replace(tzinfo=None) for date_time in date_times: @@ -49,7 +85,7 @@ def collect_radolan_data( data.append( ( date_time, - restore_radolan_data(date_time, time_resolution, folder), + restore_radolan_data(Parameter.RADOLAN, date_time, time_resolution, folder), ) ) @@ -57,7 +93,9 @@ def collect_radolan_data( continue except FileNotFoundError: - log.info(f"Acquiring RADOLAN data for {str(date_time)}") + log.info( + f"RADOLAN data for {str(date_time)} will be collected from internet" + ) remote_radolan_file_path = create_filepath_for_radolan( date_time, time_resolution @@ -72,7 +110,7 @@ def collect_radolan_data( data.append(date_time_and_file) if write_file: - store_radolan_data(date_time_and_file, time_resolution, folder) + store_radolan_data(Parameter.RADOLAN, date_time_and_file, time_resolution, folder) return data diff --git a/wetterdienst/dwd/radolan/api.py b/wetterdienst/dwd/radolan/api.py index 819bb2901..4ec21ce7a 100644 --- a/wetterdienst/dwd/radolan/api.py +++ b/wetterdienst/dwd/radolan/api.py @@ -7,7 +7,7 @@ from wetterdienst import TimeResolution from wetterdienst.dwd.metadata.constants import DWD_FOLDER_MAIN -from wetterdienst.dwd.radolan.access import collect_radolan_data +from wetterdienst.dwd.radolan.access import collect_radar_data from wetterdienst.dwd.metadata.column_names import DWDMetaColumns from wetterdienst.dwd.radolan.index import create_file_index_for_radolan from wetterdienst.dwd.util import parse_enumeration_from_template @@ -99,7 +99,7 @@ def collect_data(self) -> Generator[Tuple[datetime, BytesIO], None, None]: :return: For each datetime, the same datetime and file in bytes """ for date_time in self.date_times: - _, file_in_bytes = collect_radolan_data( + _, file_in_bytes = collect_radar_data( time_resolution=self.time_resolution, date_times=[date_time], write_file=self.write_file, diff --git a/wetterdienst/dwd/radolan/index.py b/wetterdienst/dwd/radolan/index.py index 8f115d77a..1aec6bd47 100644 --- a/wetterdienst/dwd/radolan/index.py +++ b/wetterdienst/dwd/radolan/index.py @@ -1,15 +1,21 @@ import re +from pathlib import PurePosixPath +from typing import Optional, Union import pandas as pd from dateparser import parse from wetterdienst import TimeResolution, Parameter, PeriodType -from wetterdienst.dwd.metadata.constants import DWDCDCBase, ArchiveFormat +from wetterdienst.dwd.metadata.constants import DWDCDCBase, ArchiveFormat, DWDWeatherBase from wetterdienst.dwd.metadata.column_names import DWDMetaColumns from wetterdienst.dwd.metadata.datetime import DatetimeFormat from wetterdienst.dwd.index import _create_file_index_for_dwd_server +from wetterdienst.dwd.metadata.radar_data_types import RadarDataTypes +from wetterdienst.dwd.metadata.radar_sites import RadarSites +from wetterdienst.file_path_handling.path_handling import RADAR_PARAMETERS_COMPOSITES, RADAR_PARAMETERS_SITES, \ + RADAR_PARAMETERS_WITH_HDF5 from wetterdienst.util.cache import fileindex_cache_five_minutes - +from wetterdienst.util.network import list_remote_files RADOLAN_HISTORICAL_DT_REGEX = r"(? pd.DataFra """ file_index = pd.concat( [ - _create_file_index_for_dwd_server( + _create_file_index_radolan( Parameter.RADOLAN, time_resolution, period_type, @@ -62,3 +68,85 @@ def create_file_index_for_radolan(time_resolution: TimeResolution) -> pd.DataFra ) return file_index + + +def _create_file_index_radolan( + parameter: Parameter, + time_resolution: TimeResolution, + dwd_base: Union[DWDCDCBase, DWDWeatherBase], + period_type: Optional[PeriodType] = None, + radar_site: Optional[RadarSites] = None, + radar_data_type: Optional[RadarDataTypes] = None +) -> pd.DataFrame: + """ + Function to create a file index of the DWD station data, which usually is shipped as + zipped/archived data. The file index is created for an individual set of parameters. + Args: + parameter: parameter of Parameter enumeration + time_resolution: time resolution of TimeResolution enumeration + dwd_base: base path e.g. climate_observations/germany or weather + period_type: period type of PeriodType enumeration + radar_site: Site of the radar if parameter is one of RADAR_PARAMETERS_SITES + radar_data_type: Some radar data are available in different data types + Returns: + file index in a pandas.DataFrame with sets of parameters and station id + """ + parameter_path = build_path_to_parameter(parameter, + time_resolution, + period_type, + radar_site, + radar_data_type) + + files_server = list_remote_files(parameter_path, dwd_base, recursive=True) + + files_server = pd.DataFrame( + files_server, columns=[DWDMetaColumns.FILENAME.value], dtype="str" + ) + + data_directory = DWDCDCBase.PATH.value \ + if isinstance(dwd_base, DWDCDCBase) else DWDWeatherBase.PATH.value + + files_server[DWDMetaColumns.FILENAME.value] = files_server[ + DWDMetaColumns.FILENAME.value + ].str.replace(f"{data_directory}/{dwd_base.value}/", "") + + return files_server + + +def build_path_to_parameter( + parameter: Parameter, + radar_site: Optional[RadarSites] = None, + radar_data_type: Optional[RadarDataTypes] = None +) -> PurePosixPath: + """ + Function to build a indexing file path + Args: + parameter: observation measure + time_resolution: frequency/granularity of measurement interval + period_type: recent or historical files + radar_site: Site of the radar if parameter is one of RADAR_PARAMETERS_SITES + radar_data_type: Some radar data are available in different data types + + Returns: + indexing file path relative to climate observations path + """ + if parameter in RADAR_PARAMETERS_COMPOSITES: + parameter_path = PurePosixPath(parameter.value) + + elif parameter in RADAR_PARAMETERS_SITES: + if radar_site is None: + raise ValueError("You have choosen radar site data which " + "requires to pass a RadarSite") + else: + parameter_path = PurePosixPath(parameter.value, + radar_site.value) + if parameter in RADAR_PARAMETERS_WITH_HDF5: + if radar_data_type is None: + raise ValueError("You have to define a RadarDataType [hdf5 or binary]") + elif radar_data_type is RadarDataTypes.HDF5: + parameter_path = PurePosixPath.joinpath(parameter_path, radar_data_type.value) + + else: + raise KeyError("Unknown parameter for RADAR") + + return parameter_path diff --git a/wetterdienst/dwd/radolan/store.py b/wetterdienst/dwd/radolan/store.py index 9db678f57..163464a92 100644 --- a/wetterdienst/dwd/radolan/store.py +++ b/wetterdienst/dwd/radolan/store.py @@ -3,33 +3,38 @@ from pathlib import Path from typing import Tuple, Union -from wetterdienst import TimeResolution +from wetterdienst import TimeResolution, Parameter from wetterdienst.dwd.metadata.datetime import DatetimeFormat def store_radolan_data( - date_time_and_file: Tuple[datetime, BytesIO], - time_resolution: TimeResolution, - folder: Union[str, Path], + parameter: Parameter, + date_time_and_file: Tuple[datetime, BytesIO], + time_resolution: TimeResolution, + folder: Union[str, Path], ) -> None: + """ + Stores a binary file of radolan data locally + """ date_time, file = date_time_and_file - filepath = build_local_filepath_for_radolan(date_time, folder, time_resolution) + filepath = build_local_filepath_for_radar(parameter, date_time, folder, time_resolution) filepath.parent.mkdir(parents=True, exist_ok=True) with filepath.open("wb") as f: f.write(file.read()) - # When the file has been written, reset seek pointer. - file.seek(0) - def restore_radolan_data( - date_time: datetime, time_resolution: TimeResolution, folder: Union[str, Path] + parameter: Parameter, + date_time: datetime, + time_resolution: TimeResolution, + folder: Union[str, Path] ) -> BytesIO: - filepath = build_local_filepath_for_radolan(date_time, folder, time_resolution) + """ Opens downloaded radolan data into a binary object""" + filepath = build_local_filepath_for_radar(parameter, date_time, folder, time_resolution) with filepath.open("rb") as f: file_in_bytes = BytesIO(f.read()) @@ -37,13 +42,17 @@ def restore_radolan_data( return file_in_bytes -def build_local_filepath_for_radolan( - date_time: datetime, folder: Union[str, Path], time_resolution: TimeResolution +def build_local_filepath_for_radar( + parameter: Parameter, + date_time: datetime, + folder: Union[str, Path], + time_resolution: TimeResolution ) -> Union[str, Path]: """ Args: - date_time: + parameter: radar data parameter + date_time: Timestamp of file folder: time_resolution: @@ -52,9 +61,9 @@ def build_local_filepath_for_radolan( """ local_filepath = Path( folder, - "radolan", + parameter.value, time_resolution.value, - f"radolan_{time_resolution.value}_" + f"{parameter.value}_{time_resolution.value}_" f"{date_time.strftime(DatetimeFormat.YMDHM.value)}", ).absolute()