diff --git a/examples/KWK_process.py b/examples/KWK_process.py index 69c4624..d48df49 100644 --- a/examples/KWK_process.py +++ b/examples/KWK_process.py @@ -229,7 +229,8 @@ def initiate_dist_with_hydra_nl(station): # get Hydra-NL and KWK-RMM validation data (only available for selection of stations) # TODO: this data is not reproducible yet: https://github.com/Deltares-research/kenmerkendewaarden/issues/107 - # TODO: HOEKVHLD Hydra values are different than old ones in p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Data\Processed_HydraNL\Without_model_uncertainty\Hoek_van_Holland.csv + # TODO: HOEKVHLD Hydra values are different than old ones in p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Data\Processed_HydraNL + dist_dict = {} dir_overschr_hydra = os.path.join(dir_base,'data_hydraNL') file_hydra_nl = os.path.join(dir_overschr_hydra, f'{station}.xls') @@ -238,16 +239,19 @@ def initiate_dist_with_hydra_nl(station): df_hydra_nl['values_Tfreq'] = 1/ df_hydra_nl['Terugkeertijd [jaar]'] df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]'] df_hydra_nl = df_hydra_nl.loc[:, ['values_Tfreq','values']] + df_hydra_nl.attrs['station'] = station dist_dict['Hydra-NL'] = df_hydra_nl return dist_dict - def add_validation_dist(dist_dict, dist_type): + def add_validation_dist(dist_dict, dist_type, station): dir_overschr_vali = os.path.join(dir_base,'data_overschrijding','Tables') - file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{current_station}.csv') + file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{station}.csv') if not os.path.exists(file_validation): return - dist_dict['validation'] = pd.read_csv(file_validation, sep=';') - dist_dict['validation']['values'] /= 100 + df_validation = pd.read_csv(file_validation, sep=';') + df_validation['values'] /= 100 + df_validation.attrs['station'] = station + dist_dict['validation'] = df_validation Tfreqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200, 1/500, 1/1000, 1/2000, 1/4000, 1/5000, 1/10000] @@ -258,14 +262,12 @@ def add_validation_dist(dist_dict, dist_type): # only include data up to year_slotgem data_pd_measext = data_pd_HWLW_all_12.loc[:tstop_dt] - - dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station) - # 1. Exceedance + dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station) dist_exc = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None, clip_physical_break=True, dist=dist_exc_hydra, interp_freqs=Tfreqs_interested) - add_validation_dist(dist_exc, dist_type='exceedance') + add_validation_dist(dist_exc, dist_type='exceedance', station=current_station) df_interp = dist_exc['Geinterpoleerd'] df_interp.to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'), index=False, sep=';') @@ -277,7 +279,7 @@ def add_validation_dist(dist_dict, dist_type): dist_dec = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None, clip_physical_break=True, inverse=True, interp_freqs=Tfreqs_interested) - add_validation_dist(dist_dec, dist_type='deceedance') + add_validation_dist(dist_dec, dist_type='deceedance', station=current_station) df_interp = dist_dec['Geinterpoleerd'] df_interp.to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'), index=False, sep=';') diff --git a/kenmerkendewaarden/overschrijding.py b/kenmerkendewaarden/overschrijding.py index 736de82..468855d 100644 --- a/kenmerkendewaarden/overschrijding.py +++ b/kenmerkendewaarden/overschrijding.py @@ -246,7 +246,7 @@ def distribution(df: pd.DataFrame, col: str = None, def get_weibull(df: pd.DataFrame, threshold: float, Tfreqs: np.ndarray, col: str = None, inverse: bool = False) -> pd.DataFrame: col = df.columns[0] if col is None else col - + values = df[col].values if inverse: values = -values @@ -281,6 +281,8 @@ def cost_func(params, *args): new_values = -new_values pd_return = pd.DataFrame(data={f'{col}_Tfreq': Tfreqs,col: new_values}).sort_values(by=f'{col}_Tfreq', ascending=False) + # copy attributes + pd_return.attrs = df.attrs return pd_return @@ -341,6 +343,12 @@ def apply_trendanalysis(df: pd.DataFrame, rule_type: str, rule_value: Union[pd.T def blend_distributions(df_trend: pd.DataFrame, df_weibull: pd.DataFrame, df_hydra: pd.DataFrame = None) -> pd.DataFrame: + + # get and compare station attributes + df_list = [df_trend, df_weibull, df_hydra] + station_attrs = [df.attrs["station"] for df in df_list if df is not None] + assert all(x == station_attrs[0] for x in station_attrs) + df_trend = df_trend.sort_values(by='values_Tfreq', ascending=False) df_weibull = df_weibull.sort_values(by='values_Tfreq', ascending=False) @@ -382,16 +390,28 @@ def blend_distributions(df_trend: pd.DataFrame, df_weibull: pd.DataFrame, df_hyd (df_weibull['values_Tfreq'] < df_blended1['values_Tfreq'].iloc[-1])], df_blended2, df_hydra.loc[df_hydra['values_Tfreq'] < df_blended2['values_Tfreq'].iloc[-1]]], axis=0) - df_blended = df_blended.drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq', ascending=False) - else: + else: df_blended = pd.concat([df_blended1, df_weibull.loc[(df_weibull['values_Tfreq'] < df_blended1['values_Tfreq'].iloc[-1])]], - axis=0).drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq', - ascending=False) - + axis=0) + + df_blended = df_blended.drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq', ascending=False) + + # copy attrs + df_blended.attrs = df_trend.attrs return df_blended +def interpolate_interested_Tfreqs(df: pd.DataFrame, Tfreqs: List[float]) -> pd.DataFrame: + df_interp = pd.DataFrame(data={'values': np.interp(Tfreqs, + np.flip(df['values_Tfreq'].values), + np.flip(df['values'].values)), + 'values_Tfreq': Tfreqs}).sort_values(by='values_Tfreq', ascending=False) + # copy attrs + df_interp.attrs = df.attrs + return df_interp + + def plot_overschrijding(dist: dict): """ plot overschrijding/onderschrijding @@ -409,7 +429,10 @@ def plot_overschrijding(dist: dict): Figure axis handle. """ - station = dist["Ongefilterd"].attrs["station"] + # get and compare station attributes + station_attrs = [v.attrs["station"] for k,v in dist.items()] + assert all(x == station_attrs[0] for x in station_attrs) + station = station_attrs[0] color_map = {'Ongefilterd': 'b', 'Gefilterd': 'orange', 'Trendanalyse': 'g', 'Weibull': 'r', 'Hydra-NL': 'm', 'Hydra-NL met modelonzekerheid': 'cyan', @@ -445,11 +468,3 @@ def plot_overschrijding(dist: dict): ax.set_axisbelow(True) fig.tight_layout() return fig,ax - - -def interpolate_interested_Tfreqs(df: pd.DataFrame, Tfreqs: List[float]) -> pd.DataFrame: - df_interp = pd.DataFrame(data={'values': np.interp(Tfreqs, - np.flip(df['values_Tfreq'].values), - np.flip(df['values'].values)), - 'values_Tfreq': Tfreqs}).sort_values(by='values_Tfreq', ascending=False) - return df_interp \ No newline at end of file diff --git a/tests/test_overschrijding.py b/tests/test_overschrijding.py index 35ee7bb..3180fa4 100644 --- a/tests/test_overschrijding.py +++ b/tests/test_overschrijding.py @@ -32,6 +32,7 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014): 'values_Tfreq': np.array([1.00000000e+00, 1.00000000e-01, 2.00000000e-02, 1.00000000e-02, 3.33333333e-03, 1.00000000e-03, 3.33333333e-04, 1.00000000e-04, 3.33333333e-05, 1.00000000e-05, 1.00000000e-06])}) + df_hydra.attrs = df_ext_12_2010_2014.attrs dist_hydra = {"Hydra-NL": df_hydra} dist = kw.calc_overschrijding(df_ext=df_ext_12_2010_2014, interp_freqs=Tfreqs_interested, dist=dist_hydra)