Skip to content

Commit

Permalink
95 compare station attribute in plot overschrijding (#108)
Browse files Browse the repository at this point in the history
* added extra station checks in blend_distributions and plot_overschrijding
  • Loading branch information
veenstrajelmer authored Jul 2, 2024
1 parent 41199ca commit ff47a2c
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 25 deletions.
22 changes: 12 additions & 10 deletions examples/KWK_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@
def initiate_dist_with_hydra_nl(station):
# get Hydra-NL and KWK-RMM validation data (only available for selection of stations)
# TODO: this data is not reproducible yet: https://github.com/Deltares-research/kenmerkendewaarden/issues/107
# TODO: HOEKVHLD Hydra values are different than old ones in p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Data\Processed_HydraNL\Without_model_uncertainty\Hoek_van_Holland.csv
# TODO: HOEKVHLD Hydra values are different than old ones in p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Data\Processed_HydraNL

dist_dict = {}
dir_overschr_hydra = os.path.join(dir_base,'data_hydraNL')
file_hydra_nl = os.path.join(dir_overschr_hydra, f'{station}.xls')
Expand All @@ -238,16 +239,19 @@ def initiate_dist_with_hydra_nl(station):
df_hydra_nl['values_Tfreq'] = 1/ df_hydra_nl['Terugkeertijd [jaar]']
df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]']
df_hydra_nl = df_hydra_nl.loc[:, ['values_Tfreq','values']]
df_hydra_nl.attrs['station'] = station
dist_dict['Hydra-NL'] = df_hydra_nl
return dist_dict

def add_validation_dist(dist_dict, dist_type):
def add_validation_dist(dist_dict, dist_type, station):
dir_overschr_vali = os.path.join(dir_base,'data_overschrijding','Tables')
file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{current_station}.csv')
file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{station}.csv')
if not os.path.exists(file_validation):
return
dist_dict['validation'] = pd.read_csv(file_validation, sep=';')
dist_dict['validation']['values'] /= 100
df_validation = pd.read_csv(file_validation, sep=';')
df_validation['values'] /= 100
df_validation.attrs['station'] = station
dist_dict['validation'] = df_validation

Tfreqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200,
1/500, 1/1000, 1/2000, 1/4000, 1/5000, 1/10000]
Expand All @@ -258,14 +262,12 @@ def add_validation_dist(dist_dict, dist_type):
# only include data up to year_slotgem
data_pd_measext = data_pd_HWLW_all_12.loc[:tstop_dt]


dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station)

# 1. Exceedance
dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station)
dist_exc = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None,
clip_physical_break=True, dist=dist_exc_hydra,
interp_freqs=Tfreqs_interested)
add_validation_dist(dist_exc, dist_type='exceedance')
add_validation_dist(dist_exc, dist_type='exceedance', station=current_station)
df_interp = dist_exc['Geinterpoleerd']
df_interp.to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'), index=False, sep=';')

Expand All @@ -277,7 +279,7 @@ def add_validation_dist(dist_dict, dist_type):
dist_dec = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None,
clip_physical_break=True, inverse=True,
interp_freqs=Tfreqs_interested)
add_validation_dist(dist_dec, dist_type='deceedance')
add_validation_dist(dist_dec, dist_type='deceedance', station=current_station)
df_interp = dist_dec['Geinterpoleerd']
df_interp.to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'), index=False, sep=';')

Expand Down
45 changes: 30 additions & 15 deletions kenmerkendewaarden/overschrijding.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def distribution(df: pd.DataFrame, col: str = None,
def get_weibull(df: pd.DataFrame, threshold: float, Tfreqs: np.ndarray, col: str = None,
inverse: bool = False) -> pd.DataFrame:
col = df.columns[0] if col is None else col

values = df[col].values
if inverse:
values = -values
Expand Down Expand Up @@ -281,6 +281,8 @@ def cost_func(params, *args):
new_values = -new_values
pd_return = pd.DataFrame(data={f'{col}_Tfreq': Tfreqs,col: new_values}).sort_values(by=f'{col}_Tfreq', ascending=False)

# copy attributes
pd_return.attrs = df.attrs
return pd_return


Expand Down Expand Up @@ -341,6 +343,12 @@ def apply_trendanalysis(df: pd.DataFrame, rule_type: str, rule_value: Union[pd.T


def blend_distributions(df_trend: pd.DataFrame, df_weibull: pd.DataFrame, df_hydra: pd.DataFrame = None) -> pd.DataFrame:

# get and compare station attributes
df_list = [df_trend, df_weibull, df_hydra]
station_attrs = [df.attrs["station"] for df in df_list if df is not None]
assert all(x == station_attrs[0] for x in station_attrs)

df_trend = df_trend.sort_values(by='values_Tfreq', ascending=False)
df_weibull = df_weibull.sort_values(by='values_Tfreq', ascending=False)

Expand Down Expand Up @@ -382,16 +390,28 @@ def blend_distributions(df_trend: pd.DataFrame, df_weibull: pd.DataFrame, df_hyd
(df_weibull['values_Tfreq'] < df_blended1['values_Tfreq'].iloc[-1])],
df_blended2,
df_hydra.loc[df_hydra['values_Tfreq'] < df_blended2['values_Tfreq'].iloc[-1]]], axis=0)
df_blended = df_blended.drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq', ascending=False)
else:
else:
df_blended = pd.concat([df_blended1,
df_weibull.loc[(df_weibull['values_Tfreq'] < df_blended1['values_Tfreq'].iloc[-1])]],
axis=0).drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq',
ascending=False)

axis=0)

df_blended = df_blended.drop_duplicates(subset='values_Tfreq').sort_values(by='values_Tfreq', ascending=False)

# copy attrs
df_blended.attrs = df_trend.attrs
return df_blended


def interpolate_interested_Tfreqs(df: pd.DataFrame, Tfreqs: List[float]) -> pd.DataFrame:
df_interp = pd.DataFrame(data={'values': np.interp(Tfreqs,
np.flip(df['values_Tfreq'].values),
np.flip(df['values'].values)),
'values_Tfreq': Tfreqs}).sort_values(by='values_Tfreq', ascending=False)
# copy attrs
df_interp.attrs = df.attrs
return df_interp


def plot_overschrijding(dist: dict):
"""
plot overschrijding/onderschrijding
Expand All @@ -409,7 +429,10 @@ def plot_overschrijding(dist: dict):
Figure axis handle.
"""

station = dist["Ongefilterd"].attrs["station"]
# get and compare station attributes
station_attrs = [v.attrs["station"] for k,v in dist.items()]
assert all(x == station_attrs[0] for x in station_attrs)
station = station_attrs[0]

color_map = {'Ongefilterd': 'b', 'Gefilterd': 'orange', 'Trendanalyse': 'g',
'Weibull': 'r', 'Hydra-NL': 'm', 'Hydra-NL met modelonzekerheid': 'cyan',
Expand Down Expand Up @@ -445,11 +468,3 @@ def plot_overschrijding(dist: dict):
ax.set_axisbelow(True)
fig.tight_layout()
return fig,ax


def interpolate_interested_Tfreqs(df: pd.DataFrame, Tfreqs: List[float]) -> pd.DataFrame:
df_interp = pd.DataFrame(data={'values': np.interp(Tfreqs,
np.flip(df['values_Tfreq'].values),
np.flip(df['values'].values)),
'values_Tfreq': Tfreqs}).sort_values(by='values_Tfreq', ascending=False)
return df_interp
1 change: 1 addition & 0 deletions tests/test_overschrijding.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014):
'values_Tfreq': np.array([1.00000000e+00, 1.00000000e-01, 2.00000000e-02, 1.00000000e-02,
3.33333333e-03, 1.00000000e-03, 3.33333333e-04, 1.00000000e-04,
3.33333333e-05, 1.00000000e-05, 1.00000000e-06])})
df_hydra.attrs = df_ext_12_2010_2014.attrs
dist_hydra = {"Hydra-NL": df_hydra}
dist = kw.calc_overschrijding(df_ext=df_ext_12_2010_2014, interp_freqs=Tfreqs_interested, dist=dist_hydra)

Expand Down

0 comments on commit ff47a2c

Please sign in to comment.