Merge pull request #898 from CLIMADA-project/feature/write_haz_rp_maps2

ValentinGebhart · web-flow · commit d67c80467e46 · 2024-07-15T18:12:41.000+02:00
Add functions to compute, plot, store the local hazard exceedence intensity and RP maps (new branch)
diff --git a/AUTHORS.md b/AUTHORS.md
@@ -35,3 +35,4 @@
 * Timo Schmid
 * Luca Severino
 * Samuel Juhel
+* Valentin Gebhart
diff --git a/climada/hazard/base.py b/climada/hazard/base.py
@@ -27,6 +27,7 @@
 from typing import Optional,List
 import warnings
 
+import geopandas as gpd
 import numpy as np
 from pathos.pools import ProcessPool as Pool
 from scipy import sparse
@@ -450,6 +451,75 @@ def sanitize_event_ids(self):
             LOGGER.debug('Resetting event_id.')
             self.event_id = np.arange(1, self.event_id.size + 1)
 
+    def local_return_period(self, threshold_intensities=(5., 10., 20.)):
+        """Compute local return periods for given hazard intensities. The used method
+        is fitting the ordered intensitites per centroid to the corresponding cummulated
+        frequency with a step function.
+
+        Parameters
+        ----------
+        threshold_intensities : np.array
+            User-specified hazard intensities for which the return period should be calculated
+            locally (at each centroid). Defaults to (5, 10, 20)
+
+        Returns
+        -------
+        gdf : gpd.GeoDataFrame
+            GeoDataFrame containing return periods for given threshold intensities. Each column
+            corresponds to a threshold_intensity value, each row corresponds to a centroid. Values
+            in the gdf correspond to the return period for the given centroid and
+            threshold_intensity value
+        label : str
+            GeoDataFrame label, for reporting and plotting
+        column_label : function
+            Column-label-generating function, for reporting and plotting
+        """
+        #check frequency unit
+        if self.frequency_unit in ['1/year', 'annual', '1/y', '1/a']:
+            rp_unit = 'Years'
+        elif self.frequency_unit in ['1/month', 'monthly', '1/m']:
+            rp_unit = 'Months'
+        elif self.frequency_unit in ['1/week', 'weekly', '1/w']:
+            rp_unit = 'Weeks'
+        else:
+            LOGGER.warning("Hazard's frequency unit %s is not known, "
+                           "years will be used as return period unit.", self.frequency_unit)
+            rp_unit = 'Years'
+
+        # Ensure threshold_intensities is a numpy array
+        threshold_intensities = np.array(threshold_intensities)
+
+        num_cen = self.intensity.shape[1]
+        return_periods = np.zeros((len(threshold_intensities), num_cen))
+
+        # batch_centroids = number of centroids that are handled in parallel: 
+        # batch_centroids = maximal matrix size // number of events
+        batch_centroids = CONFIG.max_matrix_size.int() // self.intensity.shape[0]
+        if batch_centroids < 1:
+            raise ValueError('Increase max_matrix_size configuration parameter to >'
+                             f'{self.intensity.shape[0]}')
+
+        # Process the intensities in chunks of centroids
+        for start_col in range(0, num_cen, batch_centroids):
+            end_col = min(start_col + batch_centroids, num_cen)
+            return_periods[:, start_col:end_col] = self._loc_return_period(
+                threshold_intensities,
+                self.intensity[:, start_col:end_col].toarray()
+                )
+
+        # create the output GeoDataFrame
+        gdf = gpd.GeoDataFrame(geometry = self.centroids.gdf['geometry'],
+                               crs = self.centroids.gdf.crs)
+        col_names = [f'{tresh_inten}' for tresh_inten in threshold_intensities]
+        gdf[col_names] = return_periods.T
+
+        # create label and column_label
+        label = f'Return Periods ({rp_unit})'
+        column_label = lambda column_names: [f'Threshold Intensity: {col} {self.units}'
+                                             for col in column_names]
+
+        return gdf, label, column_label
+
     def get_event_id(self, event_name):
         """Get an event id from its name. Several events might have the same
         name.
@@ -614,6 +684,52 @@ def _loc_return_inten(self, return_periods, inten, exc_inten):
                 inten_sort[:, cen_idx], freq_sort[:, cen_idx],
                 self.intensity_thres, return_periods)
 
+    def _loc_return_period(self, threshold_intensities, inten):
+        """Compute local return periods for user-specified threshold intensities
+         for a subset of hazard centroids
+
+        Parameters
+        ----------
+        threshold_intensities: np.array
+            User-specified hazard intensities for which the return period should be calculated
+            locally (at each centroid).
+        inten: np.array
+            subarray of full hazard intensities corresponding to a subset of the centroids
+            (rows corresponds to events, columns correspond to centroids)
+
+        Returns
+        -------
+            np.array
+            (rows corresponds to threshold_intensities, columns correspond to centroids)
+        """
+        # Assuming inten is sorted and calculating cumulative frequency
+        sort_pos = np.argsort(inten, axis=0)[::-1, :]
+        inten_sort = inten[sort_pos, np.arange(inten.shape[1])]
+        freq_sort = self.frequency[sort_pos]
+        freq_sort = np.cumsum(freq_sort, axis=0)
+        return_periods = np.zeros((len(threshold_intensities), inten.shape[1]))
+
+        for cen_idx in range(inten.shape[1]):
+            sorted_inten_cen = inten_sort[:, cen_idx]
+            cum_freq_cen = freq_sort[:, cen_idx]
+
+            for i, intensity in enumerate(threshold_intensities):
+                # Find the first occurrence where the intensity is less than the sorted intensities
+                exceedance_index = np.searchsorted(sorted_inten_cen[::-1], intensity, side='left')
+
+                # Calculate exceedance probability
+                if exceedance_index < len(cum_freq_cen):
+                    exceedance_probability = cum_freq_cen[-exceedance_index - 1]
+                else:
+                    exceedance_probability = 0  # Or set a default minimal probability
+
+                # Calculate and store return period
+                if exceedance_probability > 0:
+                    return_periods[i, cen_idx] = 1 / exceedance_probability
+                else:
+                    return_periods[i, cen_idx] = np.nan
+        return return_periods
+
     def _check_events(self):
         """Check that all attributes but centroids contain consistent data.
         Put default date, event_name and orig if not provided. Check not
diff --git a/climada/hazard/test/test_base.py b/climada/hazard/test/test_base.py
@@ -1022,6 +1022,25 @@ def test_ref_all_pass(self):
         self.assertAlmostEqual(inten_stats[1][66], 70.608592953031405)
         self.assertAlmostEqual(inten_stats[3][33], 88.510983305123631)
         self.assertAlmostEqual(inten_stats[2][99], 79.717518054203623)
+        
+    def test_local_return_period(self):
+        """Compare local return periods against reference."""
+        haz = dummy_hazard()
+        haz.intensity = sparse.csr_matrix([
+            [1., 5., 1.],
+            [2., 2., 0.]
+            ])
+        haz.frequency = np.full(4, 1.)
+        threshold_intensities = np.array([1., 2., 3.])
+        return_stats, _, _ = haz.local_return_period(threshold_intensities)
+        np.testing.assert_allclose(
+            return_stats[return_stats.columns[1:]].values.T,
+            np.array([
+                [0.5, 0.5, 1.],
+                [1., 0.5, np.nan],
+                [np.nan, 1., np.nan]
+            ])
+        )
 
 
 class TestYearset(unittest.TestCase):
diff --git a/climada/util/plot.py b/climada/util/plot.py
@@ -44,6 +44,7 @@
 from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
 from rasterio.crs import CRS
 import requests
+import geopandas as gpd
 
 from climada.util.constants import CMAP_EXPOSURES, CMAP_CAT, CMAP_RASTER
 from climada.util.files_handler import to_list
@@ -874,3 +875,80 @@ def multibar_plot(ax, data, colors=None, total_width=0.8, single_width=1,
     # Draw legend if we need
     if legend:
         ax.legend(bars, data.keys())
+
+def subplots_from_gdf(
+        gdf: gpd.GeoDataFrame,
+        colorbar_name: str = None,
+        title_subplots: callable = None,
+        smooth=True,
+        axis=None,
+        figsize=(9, 13),
+        adapt_fontsize=True,
+        **kwargs
+):
+    """Plot several subplots from different columns of a GeoDataFrame, e.g., for 
+    plotting local return periods or local exceedance intensities.
+
+    Parameters
+    ----------
+    gdf: gpd.GeoDataFrame
+        return periods per threshold intensity
+    colorbar_name: str
+        title of the subplots' colorbars
+    title_subplots: function
+        function that generates the titles of the different subplots using the columns' names
+    smooth: bool, optional
+        Smooth plot to plot.RESOLUTION x plot.RESOLUTION. Default is True
+    axis: matplotlib.axes._subplots.AxesSubplot, optional
+        Axis to use. Default is None
+    figsize: tuple, optional
+        Figure size for plt.subplots. Default is (9, 13)
+    adapt_fontsize: bool, optional
+        If set to true, the size of the fonts will be adapted to the size of the figure.
+        Otherwise the default matplotlib font size is used. Default is True.
+    kwargs: optional
+        Arguments for pcolormesh matplotlib function used in event plots.
+
+    Returns
+    -------
+    axis: matplotlib.axes._subplots.AxesSubplot
+        Matplotlib axis with the plot.
+    """
+    # check if inputs are correct types
+    if not isinstance(gdf, gpd.GeoDataFrame):
+        raise ValueError("gdf is not a GeoDataFrame")
+    gdf = gdf[['geometry', *[col for col in gdf.columns if col != 'geometry']]]
+
+    # read meta data for fig and axis labels
+    if not isinstance(colorbar_name, str):
+        print("Unknown colorbar name. Colorbar label will be missing.")
+        colorbar_name = ''
+    if not callable(title_subplots):
+        print("Unknown subplot-title-generation function. Subplot titles will be column names.")
+        title_subplots = lambda cols: [f"{col}" for col in cols]
+
+    # change default plot kwargs if plotting return periods
+    if colorbar_name.strip().startswith('Return Period'):
+        if 'cmap' not in kwargs.keys():
+            kwargs.update({'cmap': 'viridis_r'})
+        if 'norm' not in kwargs.keys():
+            kwargs.update(
+                {'norm': mpl.colors.LogNorm(
+                    vmin=gdf.values[:,1:].min(), vmax=gdf.values[:,1:].max()
+                    ),
+                'vmin': None, 'vmax': None}
+            )
+
+    axis = geo_im_from_array(
+        gdf.values[:,1:].T,
+        gdf.geometry.get_coordinates().values[:,::-1],
+        colorbar_name,
+        title_subplots(gdf.columns[1:]),
+        smooth=smooth,
+        axes=axis,
+        figsize=figsize,
+        adapt_fontsize=adapt_fontsize,
+        **kwargs
+    )
+
+    return axis
diff --git a/climada/util/test/test_plot.py b/climada/util/test/test_plot.py
@@ -25,6 +25,8 @@
 import matplotlib.pyplot as plt
 from matplotlib import colormaps as cm
 import cartopy.crs as ccrs
+import geopandas as gpd
+from shapely import Point
 
 import climada.util.plot as u_plot
 
@@ -150,6 +152,21 @@ def test_geo_im_from_array(self):
         self.assertEqual(cmap, ax.collections[0].cmap.name)
         plt.close()
 
+    def test_subplots_from_gdf(self):
+        return_periods = gpd.GeoDataFrame(
+            data = ((2., 5.), (3., 6.), (None, 2.), (1., 7.)),
+            columns = ('10.0', '20.0')
+        )
+        return_periods['geometry'] = (Point(45., 26.), Point(46., 26.), Point(45., 27.), Point(46., 27.))
+        colorbar_name = 'Return Periods (Years)'
+        title_subplots = lambda cols: [f'Threshold Intensity: {col} m/s' for col in cols]
+        (axis1, axis2) = u_plot.subplots_from_gdf(
+            return_periods, 
+            colorbar_name=colorbar_name, 
+            title_subplots=title_subplots)
+        self.assertEqual('Threshold Intensity: 10.0 m/s', axis1.get_title())
+        self.assertEqual('Threshold Intensity: 20.0 m/s', axis2.get_title())
+        plt.close()
 
 # Execute Tests
 if __name__ == "__main__":
diff --git a/doc/tutorial/climada_hazard_Hazard.ipynb b/doc/tutorial/climada_hazard_Hazard.ipynb