-
Notifications
You must be signed in to change notification settings - Fork 132
Add functions to compute, plot, store the local hazard exceedence intensity and RP maps (new branch) #898
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add functions to compute, plot, store the local hazard exceedence intensity and RP maps (new branch) #898
Changes from 10 commits
1e91def
e7a5913
81dc151
f5c3d4e
c588124
0711526
1b29f1f
24ba039
d79b670
f228918
5198646
fe99121
8dc544f
8b0d482
acddaa3
0e50ebe
7f61c52
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
from typing import Optional,List | ||
import warnings | ||
|
||
import geopandas as gpd | ||
import numpy as np | ||
from pathos.pools import ProcessPool as Pool | ||
from scipy import sparse | ||
|
@@ -39,6 +40,7 @@ | |
import climada.util.constants as u_const | ||
import climada.util.coordinates as u_coord | ||
import climada.util.dates_times as u_dt | ||
from climada.util.plot import GdfMeta | ||
|
||
|
||
LOGGER = logging.getLogger(__name__) | ||
|
@@ -450,6 +452,60 @@ def sanitize_event_ids(self): | |
LOGGER.debug('Resetting event_id.') | ||
self.event_id = np.arange(1, self.event_id.size + 1) | ||
|
||
def local_return_period(self, threshold_intensities): | ||
"""Compute local return periods for given hazard intensities. | ||
|
||
Parameters | ||
---------- | ||
threshold_intensities : np.array | ||
Hazard intensities to consider. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find the name There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about "test_intensities"? I agree, I'll update the docstrings There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmmm I think the word |
||
|
||
Returns | ||
------- | ||
gdf : gpd.GeoDataFrame | ||
GeoDataFrame containing return periods for given threshold intensity | ||
description of units of values and threshold intensities in gdf.columns.name | ||
ValentinGebhart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
#check frequency unit | ||
if self.frequency_unit not in ['1/year', 'annual', '1/y', '1/a']: | ||
LOGGER.warning("The Hazard's frequency unit is %s and not %s which " | ||
"most likely leads to incorrect results", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, I could just use the inverse of the frequency attribute. I now added using a if elif statement that can read years, months, and weeks. I don't know how to make this more generic and easier because the frequency attribute is just a string and not a "physical" frequency unit that can be converted eg to years. |
||
self.frequency_unit, u_const.DEF_FREQ_UNIT) | ||
|
||
# Ensure threshold_intensities is a numpy array | ||
threshold_intensities = np.array(threshold_intensities) | ||
|
||
num_cen = self.intensity.shape[1] | ||
return_periods = np.zeros((len(threshold_intensities), num_cen)) | ||
|
||
# Process each centroid in chunks as in local_exceedance_inten | ||
block_size = CONFIG.max_matrix_size.int() // self.intensity.shape[0] | ||
if not block_size: | ||
raise ValueError('Increase max_matrix_size configuration parameter to >' | ||
f'{self.intensity.shape[0]}') | ||
|
||
for start_col in range(0, num_cen, block_size): | ||
end_col = min(start_col + block_size, num_cen) | ||
return_periods[:, start_col:end_col] = self._loc_return_period( | ||
threshold_intensities, | ||
self.intensity[:, start_col:end_col].toarray() | ||
) | ||
ValentinGebhart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# create the output GeoDataFrame | ||
gdf = gpd.GeoDataFrame(geometry = self.centroids.gdf['geometry'], crs = self.centroids.gdf.crs) | ||
col_names = [f'{tresh_inten}' for tresh_inten in threshold_intensities] | ||
gdf[col_names] = return_periods.T | ||
|
||
#create gdf meta data | ||
gdf_meta = GdfMeta( | ||
name = 'Return Period', | ||
unit = 'Years', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not use the units of the frequency (or rather its inverse)? |
||
col_name = 'Threshold Intensity', | ||
col_unit = self.units | ||
) | ||
|
||
return gdf, gdf_meta | ||
|
||
def get_event_id(self, event_name): | ||
"""Get an event id from its name. Several events might have the same | ||
name. | ||
|
@@ -613,6 +669,48 @@ def _loc_return_inten(self, return_periods, inten, exc_inten): | |
exc_inten[:, cen_idx] = self._cen_return_inten( | ||
inten_sort[:, cen_idx], freq_sort[:, cen_idx], | ||
self.intensity_thres, return_periods) | ||
|
||
def _loc_return_period(self, threshold_intensities, inten): | ||
"""Compute local return periods for given hazard intensities for a specific chunk of data. | ||
ValentinGebhart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Parameters | ||
---------- | ||
threshold_intensities: np.array | ||
Given hazard intensities for which to calculate return periods. | ||
inten: np.array | ||
The intensity array for a specific chunk of data. | ||
ValentinGebhart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Returns | ||
------- | ||
np.array | ||
""" | ||
# Assuming inten is sorted and calculating cumulative frequency | ||
sort_pos = np.argsort(inten, axis=0)[::-1, :] | ||
ValentinGebhart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
inten_sort = inten[sort_pos, np.arange(inten.shape[1])] | ||
freq_sort = self.frequency[sort_pos] | ||
freq_sort = np.cumsum(freq_sort, axis=0) | ||
return_periods = np.zeros((len(threshold_intensities), inten.shape[1])) | ||
|
||
for cen_idx in range(inten.shape[1]): | ||
sorted_inten_cen = inten_sort[:, cen_idx] | ||
cum_freq_cen = freq_sort[:, cen_idx] | ||
|
||
for i, intensity in enumerate(threshold_intensities): | ||
# Find the first occurrence where the intensity is less than the sorted intensities | ||
exceedance_index = np.searchsorted(sorted_inten_cen[::-1], intensity, side='left') | ||
|
||
# Calculate exceedance probability | ||
if exceedance_index < len(cum_freq_cen): | ||
exceedance_probability = cum_freq_cen[-exceedance_index - 1] | ||
else: | ||
exceedance_probability = 0 # Or set a default minimal probability | ||
|
||
# Calculate and store return period | ||
if exceedance_probability > 0: | ||
return_periods[i, cen_idx] = 1 / exceedance_probability | ||
else: | ||
return_periods[i, cen_idx] = np.nan | ||
return return_periods | ||
|
||
def _check_events(self): | ||
"""Check that all attributes but centroids contain consistent data. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,9 +19,11 @@ | |
Define Hazard Plotting Methods. | ||
""" | ||
|
||
import geopandas as gpd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
import climada.util.coordinates as u_coord | ||
import climada.util.plot as u_plot | ||
|
||
|
||
|
@@ -65,6 +67,37 @@ def plot_rp_intensity(self, return_periods=(25, 50, 100, 250), | |
colbar_name, title, smooth=smooth, axes=axis, | ||
figsize=figsize, adapt_fontsize=adapt_fontsize, **kwargs) | ||
return axis, inten_stats | ||
|
||
def plot_local_rp(self, threshold_intensities, smooth=True, axis=None, figsize=(9, 13), adapt_fontsize=True, cmap = 'viridis_r', **kwargs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Formatting. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, if a user wants to adapt the say color map, they must recompute the return periods every time? This seems rather inefficient. Would it not make sense to be able to pass the return periods to this function? Something like
Then, if return_periods is none, compute them. Else, use the return periods to make the graph. Maybe throw a warning if the threshold_itnensities are not in the gdf. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sorry, I forgot to remove this function. The functionality is now in |
||
"""Plot hazard local return periods for given hazard intensities. | ||
|
||
Parameters | ||
---------- | ||
threshold_intensities: np.array | ||
Hazard intensities to consider for calculating return periods. | ||
smooth: bool, optional | ||
Smooth plot to plot.RESOLUTION x plot.RESOLUTION. | ||
axis: matplotlib.axes._subplots.AxesSubplot, optional | ||
Axis to use. | ||
figsize: tuple, optional | ||
Figure size for plt.subplots. | ||
kwargs: optional | ||
Arguments for pcolormesh matplotlib function used in event plots. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some parameters are missing. |
||
|
||
Returns | ||
------- | ||
axis: matplotlib.axes._subplots.AxesSubplot | ||
Matplotlib axis with the plot. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Incomplete. |
||
""" | ||
return_periods = self.local_return_period(threshold_intensities) | ||
colbar_name = 'Return Period (years)' | ||
title = list() | ||
for haz_int in threshold_intensities: | ||
title.append('Intensity: ' + f'{haz_int} {self.units}') | ||
axis = u_plot.geo_im_from_array(return_periods, self.centroids.coord, | ||
colbar_name, title, smooth=smooth, axes=axis, | ||
figsize=figsize, adapt_fontsize=adapt_fontsize, cmap=cmap, **kwargs) | ||
return axis, return_periods | ||
|
||
def plot_intensity(self, event=None, centr=None, smooth=True, axis=None, adapt_fontsize=True, | ||
**kwargs): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,8 +21,10 @@ | |
import unittest | ||
from unittest.mock import patch | ||
import datetime as dt | ||
import os | ||
ValentinGebhart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
from pathlib import Path | ||
from tempfile import TemporaryDirectory | ||
import rasterio | ||
ValentinGebhart marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
from pyproj import CRS | ||
import numpy as np | ||
|
@@ -32,7 +34,9 @@ | |
from climada.hazard.base import Hazard | ||
from climada.util.constants import DEF_FREQ_UNIT, HAZ_TEMPLATE_XLS, HAZ_DEMO_FL, DEF_CRS | ||
from climada.hazard.test.test_base import DATA_DIR, dummy_hazard | ||
from climada.test import get_test_file | ||
|
||
HAZ_TEST_TC :Path = get_test_file('test_tc_florida') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is not part of this PR. |
||
|
||
class TestReadDefaultNetCDF(unittest.TestCase): | ||
"""Test reading a NetCDF file where the coordinates to read match the dimensions""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,8 @@ | |
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER | ||
from rasterio.crs import CRS | ||
import requests | ||
from geopandas import GeoDataFrame | ||
from collections import namedtuple | ||
|
||
from climada.util.constants import CMAP_EXPOSURES, CMAP_CAT, CMAP_RASTER | ||
from climada.util.files_handler import to_list | ||
|
@@ -874,3 +876,73 @@ def multibar_plot(ax, data, colors=None, total_width=0.8, single_width=1, | |
# Draw legend if we need | ||
if legend: | ||
ax.legend(bars, data.keys()) | ||
|
||
# Create GdfMeta class (as a named tuple) for GeoDataFrame meta data | ||
GdfMeta = namedtuple('GdfMeta', ['name', 'unit', 'col_name', 'col_unit']) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmmm I am not sure whether this is a good idea. Maybe it is, maybe not. @emanuel-schmid What do you think? Should we add some nametuple in the utility plot for a specific case of geopandas plotting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To add more details: this is called very generically
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggested this to pass information to the plot that is not explicitly contained in the GeoDataFrame. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the idea of passing some meta data for the plotting. I am not sure that this is the perfect way this way :D At first I thought that And, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ValentinGebhart, could we maybe find another way to put this information into the GeoDataFrame? @chahank and I had the following ideas when discussing this in person:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that the naming is not optimal. The included information should be: what quantity (+ units) is represented by the values in the gdf, what quantity (+ units) is represented by the values in the column labels. The rows always correspond to centroids. Maybe instead of Otherwise, the plotting function should be "generic", in the sense that it produces one subplot per column of a generic gdf, using the values to color the different points, and using the gdf_meta varibale for figure and axis label. Of course, it is still using the geo_im_from_array util function, and I'm sure there are other ways to plot a gdf with several columns. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @peanutfun sorry I didn't see your suggestion in time. We can definitely add the labels and units as parameter(s) to the plot function. Then the user has to know about what their gdf represents without us helping by providing the information, which I guess is ok but is a bit more work to the user. If we put the as defaults to the plot function, the plot function is not generic anymore. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Basically along the same lines as @peanutfun To get the same result as implemented now when calling subplots_from_gdf(
gdf=gdf,
gdf_meta=GdfMeta(name='Return Period', unit='Years', col_name='Threshold Intensity', col_unit='m/s')
) you would subplots_from_gdf(
gdf=gdf,
colbar_name="Return Period (Years)",
title_subplots=lambda column: f"Threshold Intensity: {column} m/s",
) Seems better readable and more flexible There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. consequently, I'd let the Returns
-------
gdf : GeoDataFrame
label : str
GeoDataFrame label, for reporting and plotting
column_label : lambda
Column label, for reporting and plotting in case the user is not 100% sure what they did and is happy to get this kind of conformation. (Why not?) |
||
|
||
def subplots_from_gdf(gdf: GeoDataFrame, gdf_meta: GdfMeta = None, smooth=True, axis=None, figsize=(9, 13), adapt_fontsize=True, **kwargs): | ||
"""Plot hazard local return periods for given hazard intensities. | ||
|
||
Parameters | ||
---------- | ||
gdf: gpd.GeoDataFrame | ||
return periods per threshold intensity | ||
gdf_meta: | ||
climada.util.plot.GdfMeta | ||
gdf meta data in named tuple with attributes 'name' (quantity in gdf), 'unit', (unit thereof) | ||
'col_name' (quantity in column labels), 'col_unit' (unit thereof) | ||
smooth: bool, optional | ||
Smooth plot to plot.RESOLUTION x plot.RESOLUTION. Default is True | ||
axis: matplotlib.axes._subplots.AxesSubplot, optional | ||
Axis to use. Default is None | ||
figsize: tuple, optional | ||
Figure size for plt.subplots. Default is (9, 13) | ||
adapt_fontsize: bool, optional | ||
If set to true, the size of the fonts will be adapted to the size of the figure. | ||
Otherwise the default matplotlib font size is used. Default is True. | ||
kwargs: optional | ||
Arguments for pcolormesh matplotlib function used in event plots. | ||
|
||
Returns | ||
------- | ||
axis: matplotlib.axes._subplots.AxesSubplot | ||
Matplotlib axis with the plot. | ||
""" | ||
# check if inputs are correct types | ||
if not isinstance(gdf, GeoDataFrame): | ||
raise ValueError("gdf is not a GeoDataFrame") | ||
gdf = gdf[['geometry', *[col for col in gdf.columns if col != 'geometry']]] | ||
|
||
# read meta data for fig and axis labels | ||
if not isinstance(gdf_meta, GdfMeta): | ||
#warnings.warn("gdf_meta variable is not of type climada.util.plot.GdfMeta. Figure and axis labels will be missing.") | ||
print("gdf_meta variable is not of type climada.util.plot.GdfMeta. Figure and axis labels will be missing.") | ||
colbar_name, title_subplots = None, [f"{col}" for col in gdf.columns[1:]] | ||
else: | ||
colbar_name = f"{gdf_meta.name} ({gdf_meta.unit})" | ||
title_subplots = [f"{gdf_meta.col_name}: {thres_inten} {gdf_meta.col_unit}" | ||
for thres_inten in gdf.columns[1:]] | ||
|
||
# change default plot kwargs if plotting return periods | ||
if gdf_meta.name == 'Return Period': | ||
if 'cmap' not in kwargs.keys(): | ||
kwargs.update({'cmap': 'viridis_r'}) | ||
if 'norm' not in kwargs.keys(): | ||
kwargs.update( | ||
{'norm': mpl.colors.LogNorm(vmin=gdf.values[:,1:].min(), vmax=gdf.values[:,1:].max()), | ||
'vmin': None, 'vmax': None} | ||
) | ||
|
||
axis = geo_im_from_array( | ||
gdf.values[:,1:].T, | ||
gdf.geometry.get_coordinates().values[:,::-1], | ||
colbar_name, | ||
title_subplots, | ||
smooth=smooth, | ||
axes=axis, | ||
figsize=figsize, | ||
adapt_fontsize=adapt_fontsize, | ||
**kwargs | ||
) | ||
|
||
return axis |
Large diffs are not rendered by default.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be good to have a clear explanation here of what this method is doing. How are the local return periods computed? Interpolation? Fitting?