From 6198a1a5dbd7369eb1618505627cce4784d28abe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Fri, 11 Aug 2023 03:06:25 +0200 Subject: [PATCH 1/7] Tell Sphinx not to expand some type aliases Namely `egon.data.datasets.Dependencies` and `egon.data.datasets.Tasks`. Hopefully this makes the documentation of dataset subclasses using `@dataclass` less confusing. Solution taken from a helpful [answer] on stackoverflow. [0]: https://stackoverflow.com/a/67483317 --- docs/conf.py | 5 +++++ src/egon/data/datasets/__init__.py | 5 ++++- .../datasets/electricity_demand_timeseries/hh_buildings.py | 1 + src/egon/data/datasets/mastr.py | 1 + src/egon/data/datasets/mv_grid_districts.py | 1 + src/egon/data/datasets/re_potential_areas/__init__.py | 1 + 6 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 975e216b4..3a1c2fa84 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -47,3 +47,8 @@ add_module_names = False modindex_common_prefix = ["egon.data.", "egon.data.datasets."] + +autodoc_type_aliases = { + "Dependencies": "egon.data.datasets.Dependencies", + "Tasks": "egon.data.datasets.Tasks" +} diff --git a/src/egon/data/datasets/__init__.py b/src/egon/data/datasets/__init__.py index 2b5c04bc4..3612da056 100644 --- a/src/egon/data/datasets/__init__.py +++ b/src/egon/data/datasets/__init__.py @@ -160,6 +160,9 @@ def __init__(self, graph: TaskGraph): ) +#: A dataset can depend on other datasets or the tasks of other datasets. +Dependencies = Iterable[Union["Dataset", Task]] + @dataclass class Dataset: #: The name of the Dataset @@ -176,7 +179,7 @@ class Dataset: #: downstream of any of the listed dependencies. In case of bare #: :class:`Task`, a direct link will be created whereas for a #: :class:`Dataset` the link will be made to all of its last tasks. - dependencies: Iterable[Union[Dataset, Task]] = () + dependencies: Dependencies = () #: The tasks of this :class:`Dataset`. A :class:`TaskGraph` will #: automatically be converted to :class:`Tasks_`. tasks: Tasks = () diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py index d4cdaf443..c60946d56 100755 --- a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py @@ -3,6 +3,7 @@ assigned to OSM-buildings. """ +from __future__ import annotations from dataclasses import dataclass import random diff --git a/src/egon/data/datasets/mastr.py b/src/egon/data/datasets/mastr.py index 8f74b2b90..870c3f36b 100644 --- a/src/egon/data/datasets/mastr.py +++ b/src/egon/data/datasets/mastr.py @@ -3,6 +3,7 @@ """ +from __future__ import annotations from dataclasses import dataclass from pathlib import Path from urllib.request import urlretrieve diff --git a/src/egon/data/datasets/mv_grid_districts.py b/src/egon/data/datasets/mv_grid_districts.py index 050b69cba..6945e24c7 100644 --- a/src/egon/data/datasets/mv_grid_districts.py +++ b/src/egon/data/datasets/mv_grid_districts.py @@ -6,6 +6,7 @@ """ +from __future__ import annotations from dataclasses import dataclass from geoalchemy2.types import Geometry diff --git a/src/egon/data/datasets/re_potential_areas/__init__.py b/src/egon/data/datasets/re_potential_areas/__init__.py index ae6c2cb47..a25753caa 100644 --- a/src/egon/data/datasets/re_potential_areas/__init__.py +++ b/src/egon/data/datasets/re_potential_areas/__init__.py @@ -2,6 +2,7 @@ potential areas for wind onshore and ground-mounted PV. """ +from __future__ import annotations from dataclasses import dataclass from pathlib import Path From 64af8898420823cac9f58f3e6d4c87bab75968a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Fri, 11 Aug 2023 03:21:05 +0200 Subject: [PATCH 2/7] Uppercase SQL built-ins in SQL code block At least all those, which Sphinx highlights in boldface. --- .../electricity_demand_timeseries/hh_buildings.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py index c60946d56..70aeecb75 100755 --- a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py @@ -853,20 +853,20 @@ class setup(Dataset): FROM ( SELECT cell_id, - Count(distinct(building_id)) as building_count, - count(profile_id) as hh_count + COUNT(DISTINCT(building_id)) AS building_count, + COUNT(profile_id) AS hh_count FROM demand.egon_household_electricity_profile_of_buildings GROUP BY cell_id - ) as t1 + ) AS t1 FULL OUTER JOIN ( SELECT cell_id, array_agg( - array[cast(hh_10types as char), hh_type] - ) as hh_types + array[CAST(hh_10types AS char), hh_type] + ) AS hh_types FROM society.egon_destatis_zensus_household_per_ha_refined GROUP BY cell_id - ) as t2 + ) AS t2 ON t1.cell_id = t2.cell_id From c570601463d7b3418a96fdcc3bedc9a2f45e6e29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Fri, 11 Aug 2023 03:24:03 +0200 Subject: [PATCH 3/7] Add an article to a documentation sentence IMHO it's more readable this way. Feel free to remove the commit if you don't concur. --- src/egon/data/datasets/mastr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/egon/data/datasets/mastr.py b/src/egon/data/datasets/mastr.py index 870c3f36b..651299a84 100644 --- a/src/egon/data/datasets/mastr.py +++ b/src/egon/data/datasets/mastr.py @@ -62,8 +62,8 @@ class mastr_data_setup(Dataset): * Source: https://sandbox.zenodo.org/record/808086 * Used technologies: PV plants, wind turbines, biomass, hydro plants, combustion, nuclear, gsgk, storage - * Data is further processed in dataset :py:class:`PowerPlants - ` + * Data is further processed in the :py:class:`PowerPlants + ` dataset Dump 2022-11-17 * Source: https://sandbox.zenodo.org/record/1132839 From f03282a83c17b4d088933fef420efa9b4361286d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Fri, 11 Aug 2023 04:03:33 +0200 Subject: [PATCH 4/7] Make SQL code block in documentation more compact By removing an unnecessary level of indentation. --- .../hh_buildings.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py index 70aeecb75..dbd63366e 100755 --- a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py @@ -849,15 +849,14 @@ class setup(Dataset): .. code-block:: SQL - SELECT t1.cell_id, building_count, hh_count, hh_types - FROM ( - SELECT - cell_id, - COUNT(DISTINCT(building_id)) AS building_count, - COUNT(profile_id) AS hh_count - FROM demand.egon_household_electricity_profile_of_buildings - GROUP BY cell_id - ) AS t1 + SELECT t1.cell_id, building_count, hh_count, hh_types FROM ( + SELECT + cell_id, + COUNT(DISTINCT(building_id)) AS building_count, + COUNT(profile_id) AS hh_count + FROM demand.egon_household_electricity_profile_of_buildings + GROUP BY cell_id + ) AS t1 FULL OUTER JOIN ( SELECT cell_id, From 046ce407b0ba6f8452dc1f95a4fd87c152849ae3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Fri, 11 Aug 2023 04:07:31 +0200 Subject: [PATCH 5/7] Wrap docstrings at 76 characters --- .../datasets/electricity_demand_timeseries/hh_buildings.py | 4 ++-- src/egon/data/datasets/mastr.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py index dbd63366e..83d8a73d6 100755 --- a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py @@ -35,8 +35,8 @@ class HouseholdElectricityProfilesOfBuildings(Base): """ - Mapping of demand timeseries and buildings including cell_id, building area and - peak load. This table is created within + Mapping of demand timeseries and buildings including cell_id, building + area and peak load. This table is created within :py:func:`hh_buildings.map_houseprofiles_to_buildings()` """ __tablename__ = "egon_household_electricity_profile_of_buildings" diff --git a/src/egon/data/datasets/mastr.py b/src/egon/data/datasets/mastr.py index 651299a84..2ea2a6f63 100644 --- a/src/egon/data/datasets/mastr.py +++ b/src/egon/data/datasets/mastr.py @@ -60,8 +60,8 @@ class mastr_data_setup(Dataset): Dump 2021-05-03 * Source: https://sandbox.zenodo.org/record/808086 - * Used technologies: PV plants, wind turbines, biomass, hydro plants, combustion, - nuclear, gsgk, storage + * Used technologies: PV plants, wind turbines, biomass, hydro plants, + combustion, nuclear, gsgk, storage * Data is further processed in the :py:class:`PowerPlants ` dataset From cae7e59e6d10fb0e7e515a48ea6c18f4b46fb6e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Fri, 11 Aug 2023 04:09:35 +0200 Subject: [PATCH 6/7] Add some blank lines Courtesy of `black`. --- src/egon/data/datasets/__init__.py | 1 + .../data/datasets/electricity_demand_timeseries/hh_buildings.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/egon/data/datasets/__init__.py b/src/egon/data/datasets/__init__.py index 3612da056..3b376b35b 100644 --- a/src/egon/data/datasets/__init__.py +++ b/src/egon/data/datasets/__init__.py @@ -163,6 +163,7 @@ def __init__(self, graph: TaskGraph): #: A dataset can depend on other datasets or the tasks of other datasets. Dependencies = Iterable[Union["Dataset", Task]] + @dataclass class Dataset: #: The name of the Dataset diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py index 83d8a73d6..0314667e9 100755 --- a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py @@ -39,6 +39,7 @@ class HouseholdElectricityProfilesOfBuildings(Base): area and peak load. This table is created within :py:func:`hh_buildings.map_houseprofiles_to_buildings()` """ + __tablename__ = "egon_household_electricity_profile_of_buildings" __table_args__ = {"schema": "demand"} @@ -871,6 +872,7 @@ class setup(Dataset): """ + #: name: str = "Demand_Building_Assignment" #: From 4ea3809c9ccb15c6b0b805b7aca32f5f73722665 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Fri, 11 Aug 2023 04:13:38 +0200 Subject: [PATCH 7/7] Manually specify `Dataset` subclass constructors Instead of relying on `@dataclass` to provide one. This makes the constructor's signature less confusing. I also allows not specifying types for the class variables, because `@dataclass` no longer needs to pick the up automatically, as we are manually specifying them in the superclass constructor call. This means we have less stuff to import, most notably we no longer need to import `Tasks`. This also allows us to work around the broken display of `mv_grid_districts_setup`'s `tasks`. That was only a single function and for some reason Sphinx did not display that one correctly. Probably because it tried to use `str` instead of `repr` to render the function, because boxing the function in a one-tuple would fix the display. Anyway. Not putting the task on a class attribute but specifying it directly in the constructor means that no class attribute is displayed in the documentation, thus sidestepping the issue. --- .../hh_buildings.py | 16 +++++++++++----- src/egon/data/datasets/mastr.py | 15 ++++++++++----- src/egon/data/datasets/mv_grid_districts.py | 16 +++++++++------- .../data/datasets/re_potential_areas/__init__.py | 15 ++++++++++----- 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py index 0314667e9..49b948d83 100755 --- a/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py +++ b/src/egon/data/datasets/electricity_demand_timeseries/hh_buildings.py @@ -3,8 +3,7 @@ assigned to OSM-buildings. """ -from __future__ import annotations -from dataclasses import dataclass + import random from geoalchemy2 import Geometry @@ -15,7 +14,7 @@ import pandas as pd from egon.data import db -from egon.data.datasets import Dataset, Tasks +from egon.data.datasets import Dataset from egon.data.datasets.electricity_demand_timeseries.hh_profiles import ( HouseholdElectricityProfilesInCensusCells, get_iee_hh_demand_profiles_raw, @@ -750,7 +749,6 @@ def map_houseprofiles_to_buildings(): ) -@dataclass class setup(Dataset): """ Household electricity demand time series for scenarios in 2035 and 2050 @@ -878,4 +876,12 @@ class setup(Dataset): #: version: str = "0.0.5" #: - tasks: Tasks = (map_houseprofiles_to_buildings, get_building_peak_loads) + tasks = (map_houseprofiles_to_buildings, get_building_peak_loads) + + def __init__(self, dependencies): + super().__init__( + name=self.name, + version=self.version, + dependencies=dependencies, + tasks=self.tasks, + ) diff --git a/src/egon/data/datasets/mastr.py b/src/egon/data/datasets/mastr.py index 2ea2a6f63..dae7c61c2 100644 --- a/src/egon/data/datasets/mastr.py +++ b/src/egon/data/datasets/mastr.py @@ -3,13 +3,11 @@ """ -from __future__ import annotations -from dataclasses import dataclass from pathlib import Path from urllib.request import urlretrieve import os -from egon.data.datasets import Dataset, Tasks +from egon.data.datasets import Dataset import egon.data.config WORKING_DIR_MASTR_OLD = Path(".", "bnetza_mastr", "dump_2021-05-03") @@ -51,7 +49,6 @@ def download(dataset_name, download_dir): download(dataset_name="mastr_new", download_dir=WORKING_DIR_MASTR_NEW) -@dataclass class mastr_data_setup(Dataset): """ Download Marktstammdatenregister (MaStR) datasets unit registry. @@ -84,4 +81,12 @@ class mastr_data_setup(Dataset): #: version: str = "0.0.1" #: - tasks: Tasks = (download_mastr_data,) + tasks = (download_mastr_data,) + + def __init__(self, dependencies): + super().__init__( + name=self.name, + version=self.version, + dependencies=dependencies, + tasks=self.tasks, + ) diff --git a/src/egon/data/datasets/mv_grid_districts.py b/src/egon/data/datasets/mv_grid_districts.py index 6945e24c7..83bc38736 100644 --- a/src/egon/data/datasets/mv_grid_districts.py +++ b/src/egon/data/datasets/mv_grid_districts.py @@ -6,9 +6,6 @@ """ -from __future__ import annotations -from dataclasses import dataclass - from geoalchemy2.types import Geometry from sqlalchemy import ( ARRAY, @@ -24,7 +21,7 @@ from sqlalchemy.ext.declarative import declarative_base from egon.data import db -from egon.data.datasets import Dataset, Tasks +from egon.data.datasets import Dataset from egon.data.datasets.osmtgmod.substation import EgonHvmvSubstation from egon.data.datasets.substation_voronoi import EgonHvmvSubstationVoronoi from egon.data.db import session_scope @@ -797,7 +794,6 @@ def define_mv_grid_districts(): MvGridDistrictsDissolved.__table__.drop(bind=engine, checkfirst=True) -@dataclass class mv_grid_districts_setup(Dataset): """ Maps MV grid districts to federal states and writes it to database. @@ -883,5 +879,11 @@ class mv_grid_districts_setup(Dataset): name: str = "MvGridDistricts" #: version: str = "0.0.2" - #: - tasks: Tasks = define_mv_grid_districts + + def __init__(self, dependencies): + super().__init__( + name=self.name, + version=self.version, + dependencies=dependencies, + tasks=define_mv_grid_districts, + ) diff --git a/src/egon/data/datasets/re_potential_areas/__init__.py b/src/egon/data/datasets/re_potential_areas/__init__.py index a25753caa..269112542 100644 --- a/src/egon/data/datasets/re_potential_areas/__init__.py +++ b/src/egon/data/datasets/re_potential_areas/__init__.py @@ -2,8 +2,6 @@ potential areas for wind onshore and ground-mounted PV. """ -from __future__ import annotations -from dataclasses import dataclass from pathlib import Path from geoalchemy2 import Geometry @@ -12,7 +10,7 @@ import geopandas as gpd from egon.data import db -from egon.data.datasets import Dataset, Tasks +from egon.data.datasets import Dataset import egon.data.config Base = declarative_base() @@ -110,11 +108,18 @@ def insert_data(): ) -@dataclass class re_potential_area_setup(Dataset): #: name: str = "RePotentialAreas" #: version: str = "0.0.1" #: - tasks: Tasks = (create_tables, insert_data) + tasks = (create_tables, insert_data) + + def __init__(self, dependencies): + super().__init__( + name=self.name, + version=self.version, + dependencies=dependencies, + tasks=self.tasks, + )