Skip to content

Add functions to compute, plot, store the local hazard exceedence intensity and RP maps (new branch) #898

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@
* Timo Schmid
* Luca Severino
* Samuel Juhel
* Valentin Gebhart
116 changes: 116 additions & 0 deletions climada/hazard/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from typing import Optional,List
import warnings

import geopandas as gpd
import numpy as np
from pathos.pools import ProcessPool as Pool
from scipy import sparse
Expand All @@ -44,7 +45,7 @@
LOGGER = logging.getLogger(__name__)


class Hazard(HazardIO, HazardPlot):

Check warning on line 48 in climada/hazard/base.py

View check run for this annotation

Jenkins - WCR / Pylint

too-many-public-methods

LOW: Too many public methods (21/20)
Raw output
Used when class has too many public methods, try to reduce this to get asimpler (and so easier to use) class.
"""
Contains events of some hazard type defined at centroids. Loads from
files with format defined in FILE_EXT.
Expand Down Expand Up @@ -450,6 +451,75 @@
LOGGER.debug('Resetting event_id.')
self.event_id = np.arange(1, self.event_id.size + 1)

def local_return_period(self, threshold_intensities=(5., 10., 20.)):
"""Compute local return periods for given hazard intensities. The used method
is fitting the ordered intensitites per centroid to the corresponding cummulated
frequency with a step function.

Parameters
----------
threshold_intensities : np.array
User-specified hazard intensities for which the return period should be calculated
locally (at each centroid). Defaults to (5, 10, 20)

Returns
-------
gdf : gpd.GeoDataFrame
GeoDataFrame containing return periods for given threshold intensities. Each column
corresponds to a threshold_intensity value, each row corresponds to a centroid. Values
in the gdf correspond to the return period for the given centroid and
threshold_intensity value
label : str
GeoDataFrame label, for reporting and plotting
column_label : function
Column-label-generating function, for reporting and plotting
"""
#check frequency unit
if self.frequency_unit in ['1/year', 'annual', '1/y', '1/a']:
rp_unit = 'Years'

Check warning on line 479 in climada/hazard/base.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered line

Line 479 is not covered by tests
elif self.frequency_unit in ['1/month', 'monthly', '1/m']:
rp_unit = 'Months'

Check warning on line 481 in climada/hazard/base.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered line

Line 481 is not covered by tests
elif self.frequency_unit in ['1/week', 'weekly', '1/w']:
rp_unit = 'Weeks'
else:
LOGGER.warning("Hazard's frequency unit %s is not known, "
"years will be used as return period unit.", self.frequency_unit)
rp_unit = 'Years'

Check warning on line 487 in climada/hazard/base.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered lines

Lines 485-487 are not covered by tests

# Ensure threshold_intensities is a numpy array
threshold_intensities = np.array(threshold_intensities)

num_cen = self.intensity.shape[1]
return_periods = np.zeros((len(threshold_intensities), num_cen))

# batch_centroids = number of centroids that are handled in parallel:

Check warning on line 495 in climada/hazard/base.py

View check run for this annotation

Jenkins - WCR / Pylint

trailing-whitespace

LOW: Trailing whitespace
Raw output
Used when there is whitespace between the end of a line and the newline.
# batch_centroids = maximal matrix size // number of events
batch_centroids = CONFIG.max_matrix_size.int() // self.intensity.shape[0]
if batch_centroids < 1:
raise ValueError('Increase max_matrix_size configuration parameter to >'

Check warning on line 499 in climada/hazard/base.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered line

Line 499 is not covered by tests
f'{self.intensity.shape[0]}')

# Process the intensities in chunks of centroids
for start_col in range(0, num_cen, batch_centroids):
end_col = min(start_col + batch_centroids, num_cen)
return_periods[:, start_col:end_col] = self._loc_return_period(
threshold_intensities,
self.intensity[:, start_col:end_col].toarray()
)

# create the output GeoDataFrame
gdf = gpd.GeoDataFrame(geometry = self.centroids.gdf['geometry'],
crs = self.centroids.gdf.crs)
col_names = [f'{tresh_inten}' for tresh_inten in threshold_intensities]
gdf[col_names] = return_periods.T

# create label and column_label
label = f'Return Periods ({rp_unit})'
column_label = lambda column_names: [f'Threshold Intensity: {col} {self.units}'
for col in column_names]

return gdf, label, column_label

def get_event_id(self, event_name):
"""Get an event id from its name. Several events might have the same
name.
Expand Down Expand Up @@ -614,6 +684,52 @@
inten_sort[:, cen_idx], freq_sort[:, cen_idx],
self.intensity_thres, return_periods)

def _loc_return_period(self, threshold_intensities, inten):
"""Compute local return periods for user-specified threshold intensities
for a subset of hazard centroids

Parameters
----------
threshold_intensities: np.array
User-specified hazard intensities for which the return period should be calculated
locally (at each centroid).
inten: np.array
subarray of full hazard intensities corresponding to a subset of the centroids
(rows corresponds to events, columns correspond to centroids)

Returns
-------
np.array
(rows corresponds to threshold_intensities, columns correspond to centroids)
"""
# Assuming inten is sorted and calculating cumulative frequency
sort_pos = np.argsort(inten, axis=0)[::-1, :]
inten_sort = inten[sort_pos, np.arange(inten.shape[1])]
freq_sort = self.frequency[sort_pos]
freq_sort = np.cumsum(freq_sort, axis=0)
return_periods = np.zeros((len(threshold_intensities), inten.shape[1]))

for cen_idx in range(inten.shape[1]):
sorted_inten_cen = inten_sort[:, cen_idx]
cum_freq_cen = freq_sort[:, cen_idx]

for i, intensity in enumerate(threshold_intensities):
# Find the first occurrence where the intensity is less than the sorted intensities
exceedance_index = np.searchsorted(sorted_inten_cen[::-1], intensity, side='left')

# Calculate exceedance probability
if exceedance_index < len(cum_freq_cen):
exceedance_probability = cum_freq_cen[-exceedance_index - 1]
else:
exceedance_probability = 0 # Or set a default minimal probability

# Calculate and store return period
if exceedance_probability > 0:
return_periods[i, cen_idx] = 1 / exceedance_probability
else:
return_periods[i, cen_idx] = np.nan
return return_periods

def _check_events(self):
"""Check that all attributes but centroids contain consistent data.
Put default date, event_name and orig if not provided. Check not
Expand Down
19 changes: 19 additions & 0 deletions climada/hazard/test/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,25 @@ def test_ref_all_pass(self):
self.assertAlmostEqual(inten_stats[1][66], 70.608592953031405)
self.assertAlmostEqual(inten_stats[3][33], 88.510983305123631)
self.assertAlmostEqual(inten_stats[2][99], 79.717518054203623)

def test_local_return_period(self):
"""Compare local return periods against reference."""
haz = dummy_hazard()
haz.intensity = sparse.csr_matrix([
[1., 5., 1.],
[2., 2., 0.]
])
haz.frequency = np.full(4, 1.)
threshold_intensities = np.array([1., 2., 3.])
return_stats, _, _ = haz.local_return_period(threshold_intensities)
np.testing.assert_allclose(
return_stats[return_stats.columns[1:]].values.T,
np.array([
[0.5, 0.5, 1.],
[1., 0.5, np.nan],
[np.nan, 1., np.nan]
])
)


class TestYearset(unittest.TestCase):
Expand Down
78 changes: 78 additions & 0 deletions climada/util/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from rasterio.crs import CRS
import requests
import geopandas as gpd

from climada.util.constants import CMAP_EXPOSURES, CMAP_CAT, CMAP_RASTER
from climada.util.files_handler import to_list
Expand Down Expand Up @@ -874,3 +875,80 @@
# Draw legend if we need
if legend:
ax.legend(bars, data.keys())

def subplots_from_gdf(
gdf: gpd.GeoDataFrame,
colorbar_name: str = None,
title_subplots: callable = None,
smooth=True,
axis=None,
figsize=(9, 13),
adapt_fontsize=True,
**kwargs
):
"""Plot several subplots from different columns of a GeoDataFrame, e.g., for

Check warning on line 889 in climada/util/plot.py

View check run for this annotation

Jenkins - WCR / Pylint

trailing-whitespace

LOW: Trailing whitespace
Raw output
Used when there is whitespace between the end of a line and the newline.
plotting local return periods or local exceedance intensities.

Parameters
----------
gdf: gpd.GeoDataFrame
return periods per threshold intensity
colorbar_name: str
title of the subplots' colorbars
title_subplots: function
function that generates the titles of the different subplots using the columns' names
smooth: bool, optional
Smooth plot to plot.RESOLUTION x plot.RESOLUTION. Default is True
axis: matplotlib.axes._subplots.AxesSubplot, optional
Axis to use. Default is None
figsize: tuple, optional
Figure size for plt.subplots. Default is (9, 13)
adapt_fontsize: bool, optional
If set to true, the size of the fonts will be adapted to the size of the figure.
Otherwise the default matplotlib font size is used. Default is True.
kwargs: optional
Arguments for pcolormesh matplotlib function used in event plots.

Returns
-------
axis: matplotlib.axes._subplots.AxesSubplot
Matplotlib axis with the plot.
"""
# check if inputs are correct types
if not isinstance(gdf, gpd.GeoDataFrame):
raise ValueError("gdf is not a GeoDataFrame")

Check warning on line 919 in climada/util/plot.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered line

Line 919 is not covered by tests
gdf = gdf[['geometry', *[col for col in gdf.columns if col != 'geometry']]]

# read meta data for fig and axis labels
if not isinstance(colorbar_name, str):
print("Unknown colorbar name. Colorbar label will be missing.")
colorbar_name = ''

Check warning on line 925 in climada/util/plot.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered lines

Lines 924-925 are not covered by tests
if not callable(title_subplots):
print("Unknown subplot-title-generation function. Subplot titles will be column names.")
title_subplots = lambda cols: [f"{col}" for col in cols]

Check warning on line 928 in climada/util/plot.py

View check run for this annotation

Jenkins - WCR / Code Coverage

Not covered lines

Lines 927-928 are not covered by tests

# change default plot kwargs if plotting return periods
if colorbar_name.strip().startswith('Return Period'):
if 'cmap' not in kwargs.keys():
kwargs.update({'cmap': 'viridis_r'})
if 'norm' not in kwargs.keys():
kwargs.update(
{'norm': mpl.colors.LogNorm(
vmin=gdf.values[:,1:].min(), vmax=gdf.values[:,1:].max()
),
'vmin': None, 'vmax': None}
)

axis = geo_im_from_array(
gdf.values[:,1:].T,
gdf.geometry.get_coordinates().values[:,::-1],
colorbar_name,
title_subplots(gdf.columns[1:]),
smooth=smooth,
axes=axis,
figsize=figsize,
adapt_fontsize=adapt_fontsize,
**kwargs
)

return axis
17 changes: 17 additions & 0 deletions climada/util/test/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import matplotlib.pyplot as plt
from matplotlib import colormaps as cm
import cartopy.crs as ccrs
import geopandas as gpd
from shapely import Point

import climada.util.plot as u_plot

Expand Down Expand Up @@ -150,6 +152,21 @@ def test_geo_im_from_array(self):
self.assertEqual(cmap, ax.collections[0].cmap.name)
plt.close()

def test_subplots_from_gdf(self):
return_periods = gpd.GeoDataFrame(
data = ((2., 5.), (3., 6.), (None, 2.), (1., 7.)),
columns = ('10.0', '20.0')
)
return_periods['geometry'] = (Point(45., 26.), Point(46., 26.), Point(45., 27.), Point(46., 27.))
colorbar_name = 'Return Periods (Years)'
title_subplots = lambda cols: [f'Threshold Intensity: {col} m/s' for col in cols]
(axis1, axis2) = u_plot.subplots_from_gdf(
return_periods,
colorbar_name=colorbar_name,
title_subplots=title_subplots)
self.assertEqual('Threshold Intensity: 10.0 m/s', axis1.get_title())
self.assertEqual('Threshold Intensity: 20.0 m/s', axis2.get_title())
plt.close()

# Execute Tests
if __name__ == "__main__":
Expand Down
243 changes: 78 additions & 165 deletions doc/tutorial/climada_hazard_Hazard.ipynb

Large diffs are not rendered by default.

Loading