From 41199cabc5afa423a6d46754dfe96dd14236b44f Mon Sep 17 00:00:00 2001 From: veenstrajelmer Date: Tue, 2 Jul 2024 16:50:19 +0200 Subject: [PATCH] reverted --- examples/KWK_process.py | 20 ++++++------ kenmerkendewaarden/overschrijding.py | 46 +++++++++------------------- tests/test_overschrijding.py | 1 - 3 files changed, 24 insertions(+), 43 deletions(-) diff --git a/examples/KWK_process.py b/examples/KWK_process.py index 063b1d1..69c4624 100644 --- a/examples/KWK_process.py +++ b/examples/KWK_process.py @@ -229,7 +229,7 @@ def initiate_dist_with_hydra_nl(station): # get Hydra-NL and KWK-RMM validation data (only available for selection of stations) # TODO: this data is not reproducible yet: https://github.com/Deltares-research/kenmerkendewaarden/issues/107 - # TODO: HOEKVHLD Hydra values are different than old ones in p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Data\Processed_HydraNL + # TODO: HOEKVHLD Hydra values are different than old ones in p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Data\Processed_HydraNL\Without_model_uncertainty\Hoek_van_Holland.csv dist_dict = {} dir_overschr_hydra = os.path.join(dir_base,'data_hydraNL') file_hydra_nl = os.path.join(dir_overschr_hydra, f'{station}.xls') @@ -238,19 +238,16 @@ def initiate_dist_with_hydra_nl(station): df_hydra_nl['values_Tfreq'] = 1/ df_hydra_nl['Terugkeertijd [jaar]'] df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]'] df_hydra_nl = df_hydra_nl.loc[:, ['values_Tfreq','values']] - df_hydra_nl.attrs['station'] = station dist_dict['Hydra-NL'] = df_hydra_nl return dist_dict - def add_validation_dist(dist_dict, dist_type, station): + def add_validation_dist(dist_dict, dist_type): dir_overschr_vali = os.path.join(dir_base,'data_overschrijding','Tables') - file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{station}.csv') + file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{current_station}.csv') if not os.path.exists(file_validation): return - df_validation = pd.read_csv(file_validation, sep=';') - df_validation['values'] /= 100 - df_validation.attrs['station'] = station - dist_dict['validation'] = df_validation + dist_dict['validation'] = pd.read_csv(file_validation, sep=';') + dist_dict['validation']['values'] /= 100 Tfreqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200, 1/500, 1/1000, 1/2000, 1/4000, 1/5000, 1/10000] @@ -261,13 +258,14 @@ def add_validation_dist(dist_dict, dist_type, station): # only include data up to year_slotgem data_pd_measext = data_pd_HWLW_all_12.loc[:tstop_dt] + + dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station) # 1. Exceedance - dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station) dist_exc = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None, clip_physical_break=True, dist=dist_exc_hydra, interp_freqs=Tfreqs_interested) - add_validation_dist(dist_exc, dist_type='exceedance', station=current_station) + add_validation_dist(dist_exc, dist_type='exceedance') df_interp = dist_exc['Geinterpoleerd'] df_interp.to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'), index=False, sep=';') @@ -279,7 +277,7 @@ def add_validation_dist(dist_dict, dist_type, station): dist_dec = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None, clip_physical_break=True, inverse=True, interp_freqs=Tfreqs_interested) - add_validation_dist(dist_dec, dist_type='deceedance', station=current_station) + add_validation_dist(dist_dec, dist_type='deceedance') df_interp = dist_dec['Geinterpoleerd'] df_interp.to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'), index=False, sep=';') diff --git a/kenmerkendewaarden/overschrijding.py b/kenmerkendewaarden/overschrijding.py index ed7523f..736de82 100644 --- a/kenmerkendewaarden/overschrijding.py +++ b/kenmerkendewaarden/overschrijding.py @@ -246,7 +246,7 @@ def distribution(df: pd.DataFrame, col: str = None, def get_weibull(df: pd.DataFrame, threshold: float, Tfreqs: np.ndarray, col: str = None, inverse: bool = False) -> pd.DataFrame: col = df.columns[0] if col is None else col - + values = df[col].values if inverse: values = -values @@ -281,8 +281,6 @@ def cost_func(params, *args): new_values = -new_values pd_return = pd.DataFrame(data={f'{col}_Tfreq': Tfreqs,col: new_values}).sort_values(by=f'{col}_Tfreq', ascending=False) - # copy attributes - pd_return.attrs = df.attrs return pd_return @@ -343,12 +341,6 @@ def apply_trendanalysis(df: pd.DataFrame, rule_type: str, rule_value: Union[pd.T def blend_distributions(df_trend: pd.DataFrame, df_weibull: pd.DataFrame, df_hydra: pd.DataFrame = None) -> pd.DataFrame: - - # get and compare station attributes - df_list = [df_trend, df_weibull, df_hydra] - station_attrs = [df.attrs["station"] for df in df_list if df is not None] - assert all(x == station_attrs[0] for x in station_attrs) - df_trend = df_trend.sort_values(by='values_Tfreq', ascending=False) df_weibull = df_weibull.sort_values(by='values_Tfreq', ascending=False) @@ -359,7 +351,6 @@ def blend_distributions(df_trend: pd.DataFrame, df_weibull: pd.DataFrame, df_hyd # Weibull to Hydra if df_hydra is not None: - df_hydra = df_hydra.sort_values(by='values_Tfreq', ascending=False) Tfreqs_combined = np.unique(np.concatenate((df_weibull['values_Tfreq'].values, df_hydra['values_Tfreq'].values))) @@ -391,26 +382,14 @@ def blend_distributions(df_trend: pd.DataFrame, df_weibull: pd.DataFrame, df_hyd (df_weibull['values_Tfreq'] < df_blended1['values_Tfreq'].iloc[-1])], df_blended2, df_hydra.loc[df_hydra['values_Tfreq'] < df_blended2['values_Tfreq'].iloc[-1]]], axis=0) - else: + df_blended = df_blended.drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq', ascending=False) + else: df_blended = pd.concat([df_blended1, df_weibull.loc[(df_weibull['values_Tfreq'] < df_blended1['values_Tfreq'].iloc[-1])]], - axis=0) - - df_blended = df_blended.drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq', ascending=False) - - # copy attrs - df_blended.attrs = df_trend.attrs - return df_blended - + axis=0).drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq', + ascending=False) -def interpolate_interested_Tfreqs(df: pd.DataFrame, Tfreqs: List[float]) -> pd.DataFrame: - df_interp = pd.DataFrame(data={'values': np.interp(Tfreqs, - np.flip(df['values_Tfreq'].values), - np.flip(df['values'].values)), - 'values_Tfreq': Tfreqs}).sort_values(by='values_Tfreq', ascending=False) - # copy attrs - df_interp.attrs = df.attrs - return df_interp + return df_blended def plot_overschrijding(dist: dict): @@ -430,10 +409,7 @@ def plot_overschrijding(dist: dict): Figure axis handle. """ - # get and compare station attributes - station_attrs = [v.attrs["station"] for k,v in dist.items()] - assert all(x == station_attrs[0] for x in station_attrs) - station = station_attrs[0] + station = dist["Ongefilterd"].attrs["station"] color_map = {'Ongefilterd': 'b', 'Gefilterd': 'orange', 'Trendanalyse': 'g', 'Weibull': 'r', 'Hydra-NL': 'm', 'Hydra-NL met modelonzekerheid': 'cyan', @@ -469,3 +445,11 @@ def plot_overschrijding(dist: dict): ax.set_axisbelow(True) fig.tight_layout() return fig,ax + + +def interpolate_interested_Tfreqs(df: pd.DataFrame, Tfreqs: List[float]) -> pd.DataFrame: + df_interp = pd.DataFrame(data={'values': np.interp(Tfreqs, + np.flip(df['values_Tfreq'].values), + np.flip(df['values'].values)), + 'values_Tfreq': Tfreqs}).sort_values(by='values_Tfreq', ascending=False) + return df_interp \ No newline at end of file diff --git a/tests/test_overschrijding.py b/tests/test_overschrijding.py index 3180fa4..35ee7bb 100644 --- a/tests/test_overschrijding.py +++ b/tests/test_overschrijding.py @@ -32,7 +32,6 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014): 'values_Tfreq': np.array([1.00000000e+00, 1.00000000e-01, 2.00000000e-02, 1.00000000e-02, 3.33333333e-03, 1.00000000e-03, 3.33333333e-04, 1.00000000e-04, 3.33333333e-05, 1.00000000e-05, 1.00000000e-06])}) - df_hydra.attrs = df_ext_12_2010_2014.attrs dist_hydra = {"Hydra-NL": df_hydra} dist = kw.calc_overschrijding(df_ext=df_ext_12_2010_2014, interp_freqs=Tfreqs_interested, dist=dist_hydra)