Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
f2cbc72
New version consistent with develop
Evelyn-M Apr 21, 2021
fe1677e
new version consistent with develop
Evelyn-M Apr 21, 2021
a765270
try removing circular import
Evelyn-M Apr 21, 2021
10103b7
merge interpolation into coordinates for circular import error
Evelyn-M Apr 23, 2021
c9b18e8
adjust unit tests after merging interpolation & coordinates
Evelyn-M Apr 26, 2021
49a1846
Merge branch 'develop' of https://github.com/CLIMADA-project/climada_…
Evelyn-M Apr 26, 2021
cc7d016
rename double dist_approx function from interpolation module
Evelyn-M Apr 26, 2021
f7dce04
remove duplicate dist_approx, adjust tests
Evelyn-M Apr 27, 2021
034a312
add placeholder unittests for lp_handler and interpolation funcs
Evelyn-M Apr 27, 2021
3824e7f
change name of dist_great_circle_allgeoms
Evelyn-M Apr 30, 2021
ba48731
update interpolate_polygons
Evelyn-M Apr 30, 2021
946d4f1
update docstring to numpy format
Evelyn-M Apr 30, 2021
1ad8e47
Merge commit '67e1b9093852650186377445ac412061eea4a9d4' into feature/…
emanuel-schmid Jul 26, 2021
ead0d05
Merge branch 'feature/fix_tc_synth_decay' into feature/lines_polygons…
emanuel-schmid Jul 26, 2021
b06e205
Merge branch 'develop' of https://github.com/CLIMADA-project/climada_…
Evelyn-M Aug 2, 2021
ec19a1c
Merge branch 'develop' of https://github.com/CLIMADA-project/climada_…
Evelyn-M Aug 26, 2021
a4ceae1
refactor polygons lines interpolation
Evelyn-M Aug 30, 2021
e892d39
replace geom_on_land func
Evelyn-M Aug 31, 2021
658f9b4
move disaggregation to exposure base
Evelyn-M Aug 31, 2021
eb7a359
update tutorial
Evelyn-M Sep 2, 2021
2d49afe
demo data line exposure
Evelyn-M Sep 2, 2021
a4c7eba
Merge branch 'develop' of https://github.com/CLIMADA-project/climada_…
Evelyn-M Sep 22, 2021
77ca7ac
include litpop interpolation method for polys
Evelyn-M Sep 22, 2021
41fbaff
correct litpop disaggregation, update tutorial
Evelyn-M Sep 22, 2021
7051ca4
add m2/m value to constant valuation method
Evelyn-M Sep 22, 2021
31ed202
bug-fixing litpop disaggregation
Evelyn-M Sep 23, 2021
a4b82e5
Merge branch 'develop' of https://github.com/CLIMADA-project/climada_…
Evelyn-M Sep 24, 2021
316da1a
pushing before switching
Evelyn-M Oct 15, 2021
6ee7bcf
Update spacings
Oct 18, 2021
fad8b7b
Add notes on lines polyongs
Oct 28, 2021
2e6480e
Refactor polygon utils (part1)
Oct 28, 2021
f54387a
Merge branch 'develop' into feature/lines_polygons_exp
Nov 9, 2021
e9397a1
Allow any geo CRS for geodesics
Nov 10, 2021
128a6d5
Move inteMove interpolate lines polyongs to line_polys_handler
Nov 10, 2021
8b8ad34
Rewrite poly to points and line to points
Nov 10, 2021
9ab2c69
Remove old methods
Nov 18, 2021
c9d37a3
Add helper methods geometry impact
Nov 19, 2021
c1daf07
Remove old methods
Nov 19, 2021
94ff972
Add line impact
Nov 22, 2021
5fc09ad
Remove old set geom/line methods
Nov 23, 2021
e2c4bda
Add imp matrix sub-routines
Nov 23, 2021
8553ef1
Add small example
Nov 23, 2021
492d6ca
Make impact agg method and add geom_orig to imp
Nov 26, 2021
ed85226
Add example imp agg
Nov 26, 2021
959c580
Add impact calc for exp pnt
Nov 26, 2021
7d3d6a0
rename method
Nov 29, 2021
f420842
Add select hazard by extent
Nov 29, 2021
0832451
Allow select by reg_id and extent
Nov 29, 2021
5cf7874
Add select tight_centroids_bounds
Nov 29, 2021
53779b6
Add docstring
Nov 29, 2021
49b3274
Optimize aggregate impact
Nov 30, 2021
8c79980
Remove useless empty line
Nov 30, 2021
a591028
Add kdtree nearest neighbour search
Nov 30, 2021
f21193c
User parallel computing by default
Dec 1, 2021
20f0f5d
Change matrix multiplication to avoid an expensive todense() call
ChrisFairless Dec 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions climada/hazard/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,8 @@ def from_excel(cls, file_name, description='', var_names=None, haz_type=None):
raise KeyError("Variable not in Excel file: " + str(var_err)) from var_err
return haz

def select(self, event_names=None, date=None, orig=None, reg_id=None, reset_frequency=False):
def select(self, event_names=None, date=None, orig=None, reg_id=None,
extent=None, reset_frequency=False):
"""Select events matching provided criteria

The frequency of events may need to be recomputed (see `reset_frequency`)!
Expand All @@ -671,6 +672,9 @@ def select(self, event_names=None, date=None, orig=None, reg_id=None, reset_freq
Select only historical (True) or only synthetic (False) events.
reg_id : int, optional
Region identifier of the centroids' region_id attibute.
extent: tuple(float, float, float, float), optional
Extent of centroids as (min_lon, max_lon, min_lat, max_lat).
The default is None.
reset_frequency : bool, optional
Change frequency of events proportional to difference between first and last
year (old and new). Default: False.
Expand Down Expand Up @@ -711,6 +715,14 @@ def select(self, event_names=None, date=None, orig=None, reg_id=None, reset_freq
if not np.any(sel_cen):
LOGGER.info('No hazard centroids with region %s.', str(reg_id))
return None
if extent is not None:
cent_ext = self.centroids.select(extent=extent)
cent_ext_view = cent_ext.coord.view(dtype='float64,float64').reshape(-1)
cent_view = self.centroids.coord.view(dtype='float64,float64').reshape(-1)
sel_cen &= np.isin(cent_view, cent_ext_view)
if not np.any(sel_cen):
LOGGER.info('No hazard centroids within extent')
return None

# filter events based on name
sel_ev = np.argwhere(sel_ev).reshape(-1)
Expand All @@ -735,9 +747,12 @@ def select(self, event_names=None, date=None, orig=None, reg_id=None, reset_freq
setattr(haz, var_name, [var_val[idx] for idx in sel_ev])
elif var_name == 'centroids':
if reg_id is not None:
setattr(haz, var_name, var_val.select(reg_id))
new_cent = var_val.select(reg_id=reg_id, extent=extent)
elif extent is not None:
new_cent = cent_ext
else:
setattr(haz, var_name, var_val)
new_cent = var_val
setattr(haz, var_name, new_cent)
else:
setattr(haz, var_name, var_val)

Expand All @@ -752,6 +767,34 @@ def select(self, event_names=None, date=None, orig=None, reg_id=None, reset_freq
haz.sanitize_event_ids()
return haz

def select_tight_cent(self, buffer=0.0):
"""
Reduce hazard to centroids to minimal box containing all non-zero
intensity points.

Parameters
----------
buffer : float, optional
Buffer of box in degrees. The default is 0.0.

Returns
-------
Hazard
Copy of the Hazard with centroids reduced to minimal box. All other
hazard properties are carried over without changes.

See also
--------
self.select: Method to select centroids by lat/lon extent

"""

cent_nz = self.intensity.nonzero()[1]
lon_nz = self.centroids.lon[cent_nz]
lat_nz = self.centroids.lat[cent_nz]
ext = u_coord.latlon_bounds(lat=lat_nz, lon=lon_nz, buffer=buffer)
return self.select(extent = [ext[0], ext[2], ext[1], ext[3]])

def local_exceedance_inten(self, return_periods=(25, 50, 100, 250)):
"""Compute exceedance intensity map for given return periods.

Expand Down
248 changes: 243 additions & 5 deletions climada/util/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,28 +34,32 @@
from fiona.crs import from_epsg
import geopandas as gpd
import numpy as np
from numba import jit
import pandas as pd
import scipy as sp

import pyproj
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a new dependency. Is it absolutely necessary? Has this been discussed with @emanuel-schmid . Please, as a general convention, before adding new dependencies in a pull request discuss it openly to see whether a) it is really needed b) it is compatible with the environment c) does not add many extra implicit dependencies.

Copy link
Collaborator

@tovogt tovogt Apr 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pyproj is not a new dependency for CLIMADA. It has been included in the requirements as an indirect dependency (through geopandas) for a long time and is used in several places in the code base. But maybe it's true that we should explicitly list it in the requirements file for the hypothetical case that geopandas might drop this dependency at some point in the future.

Still, it's not a big thing that @Evelyn-M imports it here and we should probably have this discussion in a different place.

Copy link
Collaborator

@tovogt tovogt Apr 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here is a full list of packages that are explicitly imported in CLIMADA, but not listed in the requirements files:

  • cftime
  • ee (on purpose)
  • fiona
  • mock
  • numpy (!)
  • pyproj
  • shapefile
  • shapely

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good points! It seems to me that some of these requirements should really be explicitly listed in the requirements file.

I agree that the import in general does not necessarily need to be discussed here, but this is still exactly the goal of pull requests of this type: discuss the general architecture which includes the discussion about external dependencies.

import pycountry
import rasterio
import rasterio.crs
import rasterio.features
import rasterio.mask
import rasterio.warp
import scipy.interpolate
from shapely.geometry import Polygon, MultiPolygon, Point, box
from shapely.geometry import Polygon, MultiPolygon, MultiPoint, Point, box
import shapely.ops
import shapely.vectorized
import shapefile
from sklearn.neighbors import BallTree

from climada.util.constants import (DEF_CRS, SYSTEM_DIR, ONE_LAT_KM,
NATEARTH_CENTROIDS,
from climada.util.constants import (DEF_CRS,EARTH_RADIUS_KM, SYSTEM_DIR,
ONE_LAT_KM, NATEARTH_CENTROIDS,
ISIMIP_GPWV3_NATID_150AS,
ISIMIP_NATID_TO_ISO,
NONISO_REGIONS,
RIVER_FLOOD_REGIONS_CSV)
from climada.util.files_handler import download_file
import climada.util.hdf5_handler as u_hdf5
import climada.util.interpolation as u_interp

pd.options.mode.chained_assignment = None

Expand All @@ -76,6 +80,206 @@
MAX_DEM_TILES_DOWN = 300
"""Maximum DEM tiles to dowload"""

DIST_DEF = ['approx', 'haversine', 'euclidian']
"""Distances"""

METHOD = ['NN']
"""Interpolation methods"""

THRESHOLD = 100
"""Distance threshold in km. Nearest neighbors with greater distances are
not considered."""


@jit(nopython=True, parallel=True)
def _dist_sqr_approx(lats1, lons1, cos_lats1, lats2, lons2):
"""
Compute squared equirectangular approximation distance. Values need
to be sqrt and multiplicated by ONE_LAT_KM to obtain distance in km.
"""
d_lon = lons1 - lons2
d_lat = lats1 - lats2
return d_lon * d_lon * cos_lats1 * cos_lats1 + d_lat * d_lat

def interpol_index(centroids, coordinates, method=METHOD[0],
distance=DIST_DEF[1], threshold=THRESHOLD):
"""Returns for each coordinate the centroids indexes used for
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This docstring is not fully clear. Could it be made from explicit?

interpolation.

Parameters
----------
centroids : 2d array
First column contains latitude, second column contains longitude. Each
row is a geographic point
coordinates : 2d array
First column contains latitude, second column contains longitude. Each
row is a geographic point
method : str, optional
interpolation method to use. NN default.
distance : str, optional
distance to use. Haversine default
threshold : float
distance threshold in km over which no neighbor will be found. Those
are assigned with a -1 index

Returns
-------
interp : numpy array
with so many rows as coordinates containing the centroids indexes
"""
if (method == METHOD[0]) & (distance == DIST_DEF[0]):
# Compute for each coordinate the closest centroid
interp = index_nn_aprox(centroids, coordinates, threshold)
elif (method == METHOD[0]) & (distance == DIST_DEF[1]):
# Compute the nearest centroid for each coordinate using the
# haversine formula. This is done with a Ball tree.
interp = index_nn_haversine(centroids, coordinates, threshold)
elif (method == METHOD[0]) & (distance == DIST_DEF[2]):
interp = index_nn_euclidian(centroids, coordinates, threshold)
else:
LOGGER.error('Interpolation using %s with distance %s is not '
'supported.', method, distance)
interp = np.array([])
return interp

def index_nn_aprox(centroids, coordinates, threshold=THRESHOLD):
"""
Compute the nearest centroid for each coordinate using the
euclidian distance d = ((dlon)cos(lat))^2+(dlat)^2. For distant points
(e.g. more than 100km apart) use the haversine distance.

Parameters
----------
centroids : 2d array
First column contains latitude, second column contains longitude. Each
row is a geographic point
coordinates : 2d array
First column contains latitude, second column contains longitude. Each
row is a geographic point
threshold : float
distance threshold in km over which no neighbor will be found. Those
are assigned with a -1 index

Returns
-------
assigend: array
with so many rows as coordinates containing the centroids indexes
"""

# Compute only for the unique coordinates. Copy the results for the
# not unique coordinates
_, idx, inv = np.unique(coordinates, axis=0, return_index=True,
return_inverse=True)
# Compute cos(lat) for all centroids
centr_cos_lat = np.cos(np.radians(centroids[:, 0]))
assigned = np.zeros(coordinates.shape[0], int)
num_warn = 0
for icoord, iidx in enumerate(idx):
dist = _dist_sqr_approx(centroids[:, 0], centroids[:, 1],
centr_cos_lat, coordinates[iidx, 0],
coordinates[iidx, 1])
min_idx = dist.argmin()
# Raise a warning if the minimum distance is greater than the
# threshold and set an unvalid index -1
if np.sqrt(dist.min()) * ONE_LAT_KM > threshold:
num_warn += 1
min_idx = -1

# Assign found centroid index to all the same coordinates
assigned[inv == icoord] = min_idx

if num_warn:
LOGGER.warning('Distance to closest centroid is greater than %s'
'km for %s coordinates.', threshold, num_warn)

return assigned

def index_nn_haversine(centroids, coordinates, threshold=THRESHOLD):
"""Compute the neareast centroid for each coordinate using a Ball
tree with haversine distance.

Parameters
----------
centroids : 2d array
First column contains latitude, second column contains longitude.
Each row is a geographic point
coordinates : 2d array
First column contains latitude, second column contains longitude. Each
row is a geographic point
threshold : float
distance threshold in km over which no neighbor will be found. Those
are assigned with a -1 index

Returns
-------
np.array
with so many rows as coordinates containing the centroids indexes
"""
# Construct tree from centroids
tree = BallTree(np.radians(centroids), metric='haversine')
# Select unique exposures coordinates
_, idx, inv = np.unique(coordinates, axis=0, return_index=True,
return_inverse=True)

# query the k closest points of the n_points using dual tree
dist, assigned = tree.query(np.radians(coordinates[idx]), k=1,
return_distance=True, dualtree=True,
breadth_first=False)

# Raise a warning if the minimum distance is greater than the
# threshold and set an unvalid index -1
num_warn = np.sum(dist * EARTH_RADIUS_KM > threshold)
if num_warn:
LOGGER.warning('Distance to closest centroid is greater than %s'
'km for %s coordinates.', threshold, num_warn)
assigned[dist * EARTH_RADIUS_KM > threshold] = -1

# Copy result to all exposures and return value
return np.squeeze(assigned[inv])


def index_nn_euclidian(centroids, coordinates, threshold=THRESHOLD):
"""Compute the neareast centroid for each coordinate using a Ball
tree with haversine distance.

Parameters
----------
centroids : 2d array
First column contains latitude, second column contains longitude.
Each row is a geographic point
coordinates : 2d array
First column contains latitude, second column contains longitude. Each
row is a geographic point
threshold : float
distance threshold in km over which no neighbor will be found. Those
are assigned with a -1 index

Returns
-------
np.array
with so many rows as coordinates containing the centroids indexes
"""
# Construct tree from centroids
tree = sp.spatial.cKDTree(np.radians(centroids))
# Select unique exposures coordinates
_, idx, inv = np.unique(coordinates, axis=0, return_index=True,
return_inverse=True)

# query the k closest points of the n_points using dual tree
dist, assigned = tree.query(np.radians(coordinates[idx]), k=1, p=2, workers=-1)

# Raise a warning if the minimum distance is greater than the
# threshold and set an unvalid index -1
num_warn = np.sum(dist * EARTH_RADIUS_KM > threshold)
if num_warn:
LOGGER.warning('Distance to closest centroid is greater than %s'
'km for %s coordinates.', threshold, num_warn)
assigned[dist * EARTH_RADIUS_KM > threshold] = -1

# Copy result to all exposures and return value
return np.squeeze(assigned[inv])


def latlon_to_geosph_vector(lat, lon, rad=False, basis=False):
"""Convert lat/lon coodinates to radial vectors (on geosphere)

Expand Down Expand Up @@ -318,6 +522,40 @@ def dist_approx(lat1, lon1, lat2, lon2, log=False, normalize=True,
raise KeyError("Unknown distance approximation method: %s" % method)
return (dist, vtan) if log else dist

def compute_geodesic_lengths(gdf):
"""Calculate the great circle (geodesic / spherical) lengths along any
(complicated) line geometry object, based on the pyproj.Geod implementation.

Parameters
----------
gdf : gpd.GeoDataframe with geometrical shapes of which to compute the length

Returns
-------
series : a pandas series (column) with the great circle lengths of the
objects in metres.

See also
--------
* dist_approx() which also offers haversine distance calculation options
between specific points (not along any geometries however).
* interpolation.interpolate_lines()

Note
----
This implementation relies on non-projected (i.e. geographic coordinate
systems that span the entire globe) crs only, which results in
sea-level distances and hence a certain (minor) level of distortion; cf.
https://gis.stackexchange.com/questions/176442/what-is-the-real-distance-between-positions
"""
# convert to non-projected crs if needed
gdf_tmp = gdf.to_crs(DEF_CRS) if not gdf.crs.is_geographic else gdf.copy()
geod = gdf_tmp.crs.get_geod()

return gdf_tmp.apply(lambda row: geod.geometry_length(
row.geometry), axis=1)


def get_gridcellarea(lat, resolution=0.5, unit='km2'):
"""The area covered by a grid cell is calculated depending on the latitude
1 degree = ONE_LAT_KM (111.12km at the equator)
Expand Down Expand Up @@ -923,7 +1161,7 @@ def assign_coordinates(coords, coords_to_assign, method="NN", distance="haversin
# assign remaining coordsinates to their geographically nearest neighbor
if threshold > 0 and exact_assign_idx.size != coords_view.size:
not_assigned_idx_mask = (assigned_idx == -1)
assigned_idx[not_assigned_idx_mask] = u_interp.interpol_index(
assigned_idx[not_assigned_idx_mask] = interpol_index(
coords_to_assign, coords[not_assigned_idx_mask],
method=method, distance=distance, threshold=threshold)
return assigned_idx
Expand Down
Loading