Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/fixes/#989-mismatch-cts-bus-id' …
Browse files Browse the repository at this point in the history
…into local
  • Loading branch information
nailend committed Oct 25, 2022
2 parents 2f06d48 + e302ab4 commit 1ade0cc
Showing 1 changed file with 104 additions and 127 deletions.
231 changes: 104 additions & 127 deletions src/egon/data/datasets/electricity_demand_timeseries/cts_buildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ class CtsDemandBuildings(Dataset):
def __init__(self, dependencies):
super().__init__(
name="CtsDemandBuildings",
version="0.0.1",
version="0.0.2",
dependencies=dependencies,
tasks=(
cts_buildings,
Expand Down Expand Up @@ -419,15 +419,14 @@ def create_synthetic_buildings(df, points=None, crs="EPSG:3035"):

def buildings_with_amenities():
"""
Amenities which are assigned to buildings are determined
and grouped per building and zensus cell. Buildings
covering multiple cells therefore exists multiple times
but in different zensus cells. This is necessary to cover
all cells with a cts demand. If buildings exist in multiple
substations, their amenities are summed and assigned and kept in
one substation only. If as a result, a census cell is uncovered,
a synthetic amenity is placed. The buildings are aggregated
afterwards during the calculation of the profile_share.
Amenities which are assigned to buildings are determined and grouped per
building and zensus cell. Buildings covering multiple cells therefore
exists multiple times but in different zensus cells. This is necessary to
cover as many cells with a cts demand as possible. If buildings exist in
multiple mvgds (bus_id) , only the amenities within the same as the
building centroid are kept. If as a result, a census cell is uncovered
by any buildings, a synthetic amenity is placed. The buildings are
aggregated afterwards during the calculation of the profile_share.
Returns
-------
Expand All @@ -437,6 +436,7 @@ def buildings_with_amenities():
Contains synthetic amenities in lost cells. Might be empty
"""

from saio.boundaries import egon_map_zensus_buildings_filtered_all
from saio.openstreetmap import osm_amenities_in_buildings_filtered

with db.session_scope() as session:
Expand All @@ -458,9 +458,9 @@ def buildings_with_amenities():
EgonDemandRegioZensusElectricity.scenario == "eGon2035",
)
)
df_amenities_in_buildings = pd.read_sql(
cells_query.statement, cells_query.session.bind, index_col=None
)
df_amenities_in_buildings = pd.read_sql(
cells_query.statement, con=session.connection(), index_col=None
)

df_amenities_in_buildings["geom_building"] = df_amenities_in_buildings[
"geom_building"
Expand All @@ -469,110 +469,84 @@ def buildings_with_amenities():
"geom_amenity"
].apply(to_shape)

df_amenities_in_buildings["n_amenities_inside"] = 1

# add identifier column for buildings in multiple substations
df_amenities_in_buildings[
"duplicate_identifier"
] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
"n_amenities_inside"
].transform(
"cumsum"
)
df_amenities_in_buildings = df_amenities_in_buildings.sort_values(
["id", "duplicate_identifier"]
)
# sum amenities of buildings with multiple substations
df_amenities_in_buildings[
"n_amenities_inside"
] = df_amenities_in_buildings.groupby(["id", "duplicate_identifier"])[
"n_amenities_inside"
].transform(
"sum"
)
# retrieve building centroid bus_id
with db.session_scope() as session:

# create column to always go for bus_id with max amenities
df_amenities_in_buildings[
"max_amenities"
] = df_amenities_in_buildings.groupby(["id", "bus_id"])[
"n_amenities_inside"
].transform(
"sum"
)
# sort to go for
df_amenities_in_buildings.sort_values(
["id", "max_amenities"], ascending=False, inplace=True
)
cells_query = session.query(
egon_map_zensus_buildings_filtered_all.id,
MapZensusGridDistricts.bus_id.label("building_bus_id"),
).filter(
egon_map_zensus_buildings_filtered_all.zensus_population_id
== MapZensusGridDistricts.zensus_population_id
)

# identify lost zensus cells
df_lost_cells = df_amenities_in_buildings.loc[
df_amenities_in_buildings.duplicated(
subset=["id", "duplicate_identifier"], keep="first"
df_building_bus_id = pd.read_sql(
cells_query.statement, con=session.connection(), index_col=None
)
]
df_lost_cells.drop_duplicates(
subset=["zensus_population_id"], inplace=True
)

# drop buildings with multiple substation and lower max amenity
df_amenities_in_buildings.drop_duplicates(
subset=["id", "duplicate_identifier"], keep="first", inplace=True
df_amenities_in_buildings = pd.merge(
left=df_amenities_in_buildings, right=df_building_bus_id, on="id"
)

# identify amenities with differing bus_id as building
identified_amenities = df_amenities_in_buildings.loc[
df_amenities_in_buildings["bus_id"]
!= df_amenities_in_buildings["building_bus_id"]
].index

lost_cells = df_amenities_in_buildings.loc[
identified_amenities, "zensus_population_id"
].unique()

# check if lost zensus cells are already covered
if not df_lost_cells.empty:
if not (
df_amenities_in_buildings["zensus_population_id"]
.isin(df_lost_cells["zensus_population_id"])
.empty
):
# query geom data for cell if not
with db.session_scope() as session:
cells_query = session.query(
DestatisZensusPopulationPerHa.id,
DestatisZensusPopulationPerHa.geom,
).filter(
DestatisZensusPopulationPerHa.id.in_(
df_lost_cells["zensus_population_id"]
)
)
if not (
df_amenities_in_buildings["zensus_population_id"]
.isin(lost_cells)
.empty
):
# query geom data for cell if not
with db.session_scope() as session:
cells_query = session.query(
DestatisZensusPopulationPerHa.id,
DestatisZensusPopulationPerHa.geom,
).filter(
DestatisZensusPopulationPerHa.id.in_(pd.Index(lost_cells))
)

df_lost_cells = gpd.read_postgis(
cells_query.statement,
cells_query.session.bind,
geom_col="geom",
)

# place random amenity in cell
df_lost_cells["n_amenities_inside"] = 1
df_lost_cells.rename(
columns={
"id": "zensus_population_id",
},
inplace=True,
)
df_lost_cells = place_buildings_with_amenities(
df_lost_cells, amenities=1
)
df_lost_cells.rename(
columns={
# "id": "zensus_population_id",
"geom_point": "geom_amenity",
},
inplace=True,
)
df_lost_cells.drop(
columns=["building_count", "n_amenities_inside"], inplace=True
)
else:
df_lost_cells = None
# place random amenity in cell
df_lost_cells["n_amenities_inside"] = 1
df_lost_cells.rename(
columns={
"id": "zensus_population_id",
},
inplace=True,
)
df_lost_cells = place_buildings_with_amenities(
df_lost_cells, amenities=1
)
df_lost_cells.rename(
columns={
# "id": "zensus_population_id",
"geom_point": "geom_amenity",
},
inplace=True,
)
df_lost_cells.drop(
columns=["building_count", "n_amenities_inside"], inplace=True
)
else:
df_lost_cells = None

# drop helper columns
df_amenities_in_buildings.drop(
columns=["duplicate_identifier"], inplace=True
)
df_amenities_in_buildings.drop(identified_amenities, inplace=True)
df_amenities_in_buildings.drop(columns="building_bus_id", inplace=True)

df_amenities_in_buildings["n_amenities_inside"] = 1

# sum amenities per building and cell
df_amenities_in_buildings[
Expand Down Expand Up @@ -938,32 +912,32 @@ def calc_building_amenity_share(df_cts_buildings):
"building_amenity_share"
].multiply(df_demand_share["cell_share"])

# Fix #989
# Mismatched bus_id
# May result in failing sanity checks
from saio.boundaries import egon_map_zensus_buildings_filtered_all

with db.session_scope() as session:
query = session.query(
egon_map_zensus_buildings_filtered_all.id,
MapZensusGridDistricts.bus_id,
).filter(
egon_map_zensus_buildings_filtered_all.id.in_(
df_cts_buildings.id.values
),
MapZensusGridDistricts.zensus_population_id
== egon_map_zensus_buildings_filtered_all.zensus_population_id,
)

df_map_bus_id = pd.read_sql(
query.statement, session.connection(), index_col=None
)

df_demand_share = pd.merge(
left=df_demand_share.drop(columns="bus_id"),
right=df_map_bus_id,
on="id",
)
# # Fix #989
# # Mismatched bus_id
# # May result in failing sanity checks
# from saio.boundaries import egon_map_zensus_buildings_filtered_all
#
# with db.session_scope() as session:
# query = session.query(
# egon_map_zensus_buildings_filtered_all.id,
# MapZensusGridDistricts.bus_id,
# ).filter(
# egon_map_zensus_buildings_filtered_all.id.in_(
# df_cts_buildings.id.values
# ),
# MapZensusGridDistricts.zensus_population_id
# == egon_map_zensus_buildings_filtered_all.zensus_population_id,
# )
#
# df_map_bus_id = pd.read_sql(
# query.statement, session.connection(), index_col=None
# )
#
# df_demand_share = pd.merge(
# left=df_demand_share.drop(columns="bus_id"),
# right=df_map_bus_id,
# on="id",
# )

# only pass selected columns
df_demand_share = df_demand_share[
Expand Down Expand Up @@ -1236,7 +1210,10 @@ def adapt_numpy_int64(numpy_int64):
df_amenities_without_buildings = df_amenities_without_buildings.append(
df_lost_cells, ignore_index=True
)
log.info("Lost cells due to substation intersection appended!")
log.info(
f"{df_lost_cells.shape[0]} lost cells due to substation "
f"intersection appended!"
)

# One building per amenity
df_amenities_without_buildings["n_amenities_inside"] = 1
Expand Down

0 comments on commit 1ade0cc

Please sign in to comment.