From 36ed77a4ce7345da5f96c76d6c161d24b8e0c7f4 Mon Sep 17 00:00:00 2001 From: veenstrajelmer <60435591+veenstrajelmer@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:46:34 +0200 Subject: [PATCH] 94 attach station attribute (#96) * pass on attrs and removed station argument from `plot_gemiddeldgetij()` * add comparison of station attrs in several functions * simplified plot_tidalindicators and add compare for station attrs * fixed logging --- examples/KWK_getcheckdata.py | 2 +- examples/KWK_process.py | 43 ++++++++++++++------------- kenmerkendewaarden/gemiddeldgetij.py | 32 ++++++++++++-------- kenmerkendewaarden/havengetallen.py | 2 +- kenmerkendewaarden/overschrijding.py | 2 +- kenmerkendewaarden/slotgemiddelden.py | 13 +++++--- kenmerkendewaarden/tidalindicators.py | 24 ++++++--------- tests/test_tidalindicators.py | 3 +- 8 files changed, 65 insertions(+), 56 deletions(-) diff --git a/examples/KWK_getcheckdata.py b/examples/KWK_getcheckdata.py index e74dc75..a109a45 100644 --- a/examples/KWK_getcheckdata.py +++ b/examples/KWK_getcheckdata.py @@ -9,9 +9,9 @@ # set logging level to INFO to get log messages import logging +logging.basicConfig() # calling basicConfig is essential to set logging level for sub-modules logging.getLogger("kenmerkendewaarden").setLevel(level="INFO") - # TODO: overview of data improvements: https://github.com/Deltares-research/kenmerkendewaarden/issues/29 # TODO: overview of data issues in https://github.com/Deltares-research/kenmerkendewaarden/issues/4 # TODO: missings/duplicates reported in https://github.com/Rijkswaterstaat/wm-ws-dl/issues/39. Some of the duplicates are not retrieved since we use clean_df in ddlpy diff --git a/examples/KWK_process.py b/examples/KWK_process.py index 620698d..f829c95 100644 --- a/examples/KWK_process.py +++ b/examples/KWK_process.py @@ -10,6 +10,7 @@ # set logging level to INFO to get log messages import logging +logging.basicConfig() # calling basicConfig is essential to set logging level for sub-modules logging.getLogger("kenmerkendewaarden").setLevel(level="INFO") # TODO: HW/LW numbers not always increasing (at havengetallen): ['HANSWT','BROUWHVSGT08','PETTZD','DORDT'] @@ -48,9 +49,9 @@ 'MARLGT','NES','NIEUWSTZL','NORTHCMRT','DENOVBTN','OOSTSDE04','OOSTSDE11','OOSTSDE14','OUDSD','OVLVHWT','Q1', 'ROOMPBNN','ROOMPBTN','SCHAARVDND','SCHEVNGN','SCHIERMNOG','SINTANLHVSGR','STAVNSE','STELLDBTN','TERNZN','TERSLNZE','TEXNZE', 'VLAKTVDRN','VLIELHVN','VLISSGN','WALSODN','WESTKPLE','WESTTSLG','WIERMGDN','YERSKE'] +station_list = ["VLISSGN","HOEKVHLD","IJMDBTHVN","HARLGN","DENHDR","DELFZL","SCHIERMNOG","VLIELHVN","STELLDBTN","SCHEVNGN","ROOMPBTN"] # subset of 11 stations along the coast # TODO: maybe add from Dillingh 2013: DORDT, MAASMSMPL, PETTZD, ROTTDM -stat_list = ['HOEKVHLD']#,'HARVT10','VLISSGN'] - +station_list = ['HOEKVHLD']#,'HARVT10','VLISSGN'] nap_correction = False @@ -62,7 +63,7 @@ compute_overschrijding = True -for current_station in stat_list: +for current_station in station_list: print(f'starting process for {current_station}') plt.close('all') @@ -80,8 +81,9 @@ #crop timeseries to 10y data_pd_HWLW_10y_12 = hatyan.crop_timeseries(data_pd_HWLW_all_12, times=slice(tstart_dt,tstop_dt),onlyfull=False) - - + + + #### TIDAL INDICATORS if compute_indicators and data_pd_meas_all is not None and data_pd_HWLW_all is not None: print(f'tidal indicators for {current_station}') @@ -95,12 +97,16 @@ dict_HWLWtidalindicators["hat"] = hat dict_HWLWtidalindicators["lat"] = lat + # merge dictionaries + dict_wltidalindicators.update(dict_HWLWtidalindicators) + # plot - fig, ax = kw.plot_tidalindicators(dict_wltidalindicators, dict_HWLWtidalindicators) + fig, ax = kw.plot_tidalindicators(dict_wltidalindicators) fig.savefig(os.path.join(dir_indicators,f'tidal_indicators_{current_station}')) - - - + + + + #### SLOTGEMIDDELDEN # TODO: nodal cycle is not in same phase for all stations, this is not physically correct. # TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND) @@ -153,11 +159,12 @@ df_havengetallen, data_pd_HWLW = kw.calc_havengetallen(df_ext=data_pd_HWLW_10y_12, return_df_ext=True) - print('HWLW FIGUREN PER TIJDSKLASSE, INCLUSIEF MEDIAN LINE') + print(f'havengetallen for {current_station}') + # plot hwlw per timeclass including median fig, axs = kw.plot_HWLW_pertimeclass(data_pd_HWLW, df_havengetallen) fig.savefig(os.path.join(dir_havget,f'HWLW_pertijdsklasse_inclmedianline_{current_station}')) - print('AARDAPPELGRAFIEK') + # plot aardappelgrafiek fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen) fig.savefig(os.path.join(dir_havget, f'aardappelgrafiek_{year_slotgem}_{current_station}')) @@ -169,11 +176,10 @@ - ##### GEMIDDELDE GETIJKROMMEN if compute_gemgetij and data_pd_meas_all is not None and data_pd_HWLW_all is not None: - print(f'gem getijkrommen for {current_station}') + print(f'gemiddelde getijkrommen for {current_station}') pred_freq = "10s" # frequency influences the accuracy of havengetallen-scaling and is writing frequency of BOI timeseries # derive getijkrommes: raw, scaled to havengetallen, scaled to havengetallen and 12h25min period @@ -192,11 +198,11 @@ file_boi_csv = os.path.join(dir_gemgetij, f'Getijkromme_BOI_{key}_{current_station}_slotgem{year_slotgem}.csv') gemgetij_corr_boi[key].to_csv(file_boi_csv, float_format='%.3f') - fig_sum, ax_sum = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr, gemgetij_dict_raw=gemgetij_raw, station=current_station, tick_hours=6) + fig_sum, ax_sum = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr, gemgetij_dict_raw=gemgetij_raw, tick_hours=6) fig_sum.savefig(os.path.join(dir_gemgetij,f'gemgetij_trefHW_{current_station}')) - print(f'plot BOI figure and compare to KW2020: {current_station}') - fig_boi, ax1_boi = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr_boi, station=current_station, tick_hours=12) + # plot BOI figure and compare to KW2020 + fig_boi, ax1_boi = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr_boi, tick_hours=12) # plot validation lines if available # TODO: these index of this line is converted from datetimes to timedeltas to get it in the same plot @@ -225,7 +231,6 @@ - ###OVERSCHRIJDINGSFREQUENTIES # TODO: SLR trend correctie voor overschrijdingsfrequenties en evt ook voor andere KW? # TODO: resulting freqs seem to be shifted w.r.t. getijtafelboekje (mail PH 9-3-2022) @@ -246,7 +251,6 @@ if current_station =='HOEKVHLD': dir_vali_overschr = os.path.join(dir_base,'data_overschrijding') # TODO: this data is not reproducible yet stat_name = 'Hoek_van_Holland' - print('Load Hydra-NL distribution data and other validation data') dist_vali_exc = {} dist_vali_exc['Hydra-NL'] = pd.read_csv(os.path.join(dir_vali_overschr,'Processed_HydraNL','Without_model_uncertainty',f'{stat_name}.csv'), sep=';', header=[0]) dist_vali_exc['Hydra-NL']['values'] /= 100 # cm to m @@ -262,7 +266,6 @@ dist_vali_dec['validation']['values'] /= 100 # 1. Exceedance - print('Exceedance') dist_exc = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None, clip_physical_break=True, dist=dist_vali_exc, interp_freqs=Tfreqs_interested) @@ -274,7 +277,6 @@ fig.savefig(os.path.join(dir_overschrijding, f'Exceedance_lines_{current_station}.png')) # 2. Deceedance - print('Deceedance') dist_dec = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None, clip_physical_break=True, dist=dist_vali_dec, inverse=True, interp_freqs=Tfreqs_interested) @@ -283,4 +285,3 @@ fig, ax = kw.plot_overschrijding(dist_dec) fig.savefig(os.path.join(dir_overschrijding, f'Deceedance_lines_{current_station}.png')) - diff --git a/kenmerkendewaarden/gemiddeldgetij.py b/kenmerkendewaarden/gemiddeldgetij.py index a5da5b9..127453c 100644 --- a/kenmerkendewaarden/gemiddeldgetij.py +++ b/kenmerkendewaarden/gemiddeldgetij.py @@ -76,6 +76,10 @@ def calc_gemiddeldgetij(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None, min_ if scale_extremes: if df_ext is None: raise ValueError("df_ext should be provided if scale_extremes=True") + # compare station attributes + station_attrs = [df.attrs["station"] for df in [df_meas, df_ext]] + assert all(x == station_attrs[0] for x in station_attrs) + df_havengetallen = calc_havengetallen(df_ext=df_ext, min_coverage=min_coverage) HW_sp, LW_sp = df_havengetallen.loc[0,['HW_values_median','LW_values_median']] # spring HW_np, LW_np = df_havengetallen.loc[6,['HW_values_median','LW_values_median']] # neap @@ -90,14 +94,14 @@ def calc_gemiddeldgetij(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None, min_ times_pred_1mnth = pd.date_range(start=pd.Timestamp(tstop_dt.year,1,1,0,0)-pd.Timedelta(hours=12), end=pd.Timestamp(tstop_dt.year,2,1,0,0), freq=freq) #start 12 hours in advance, to assure also corrected values on desired tstart comp_av.attrs['nodalfactors'] = False #nodalfactors=False to guarantee repetative signal - prediction_av = hatyan.prediction(comp_av, times=times_pred_1mnth) - prediction_av_ext = hatyan.calc_HWLW(ts=prediction_av, calc_HWLW345=False) + prediction_avg = hatyan.prediction(comp_av, times=times_pred_1mnth) + prediction_avg_ext = hatyan.calc_HWLW(ts=prediction_avg, calc_HWLW345=False) - time_firstHW = prediction_av_ext.loc[prediction_av_ext['HWLWcode']==1].index[0] #time of first HW - ia1 = prediction_av_ext.loc[time_firstHW:].index[0] #time of first HW - ia2 = prediction_av_ext.loc[time_firstHW:].index[2] #time of second HW - prediction_av_one = prediction_av.loc[ia1:ia2] - prediction_av_ext_one = prediction_av_ext.loc[ia1:ia2] + time_firstHW = prediction_avg_ext.loc[prediction_avg_ext['HWLWcode']==1].index[0] #time of first HW + ia1 = prediction_avg_ext.loc[time_firstHW:].index[0] #time of first HW + ia2 = prediction_avg_ext.loc[time_firstHW:].index[2] #time of second HW + prediction_avg_one = prediction_avg.loc[ia1:ia2] + prediction_avg_ext_one = prediction_avg_ext.loc[ia1:ia2] # ============================================================================= # Hatyan predictie voor 1 jaar met gemiddelde helling maansbaan (voor afleiden spring-doodtijcyclus) >> predictie zonder nodalfactors instead @@ -159,7 +163,7 @@ def calc_gemiddeldgetij(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None, min_ #timeseries for gele boekje (av/sp/np have different lengths, time is relative to HW of av and HW of sp/np are shifted there) logger.info(f'reshape_signal GEMGETIJ: {current_station}') - prediction_av_corr_one = reshape_signal(prediction_av_one, prediction_av_ext_one, HW_goal=HW_av, LW_goal=LW_av, tP_goal=tP_goal) + prediction_av_corr_one = reshape_signal(prediction_avg_one, prediction_avg_ext_one, HW_goal=HW_av, LW_goal=LW_av, tP_goal=tP_goal) prediction_av_corr_one.index = prediction_av_corr_one.index - prediction_av_corr_one.index[0] # make relative to first timestamp (=HW) if scale_period: # resampling required because of scaling prediction_av_corr_one = prediction_av_corr_one.resample(freq).nearest() @@ -188,7 +192,7 @@ def calc_gemiddeldgetij(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None, min_ return gemgetij_dict -def plot_gemiddeldgetij(gemgetij_dict:dict, gemgetij_dict_raw:dict = None, station:str = None, tick_hours:int = None): +def plot_gemiddeldgetij(gemgetij_dict:dict, gemgetij_dict_raw:dict = None, tick_hours:int = None): """ Default plotting function for gemiddeldgetij dictionaries. @@ -198,8 +202,6 @@ def plot_gemiddeldgetij(gemgetij_dict:dict, gemgetij_dict_raw:dict = None, stati dictionary as returned from `kw.calc_gemiddeldgetij()`. gemgetij_raw_dict : dict, optional dictionary as returned from `kw.calc_gemiddeldgetij()` e.g. with uncorrected values. The default is None. - station : str, optional - station name, used in figure title. The default is None. ticks_12h : bool, optional whether to use xaxis ticks of 12 hours, otherwise automatic but less nice values @@ -211,7 +213,11 @@ def plot_gemiddeldgetij(gemgetij_dict:dict, gemgetij_dict_raw:dict = None, stati Figure axis handle. """ - # TODO: prevent station argument + # get and compare station attributes + station_attrs = [v.attrs["station"] for k,v in gemgetij_dict.items()] + assert all(x == station_attrs[0] for x in station_attrs) + station = station_attrs[0] + logger.info(f'plot getijkromme trefHW: {station}') fig, ax = plt.subplots(figsize=(14,7)) cmap = plt.get_cmap("tab10") @@ -376,4 +382,6 @@ def repeat_signal(ts_one_HWtoHW, nb, nf): index=ts_one_HWtoHW.index + iAdd*tidalperiod) ts_rep = pd.concat([ts_rep,ts_add]) ts_rep = ts_rep.loc[~ts_rep.index.duplicated()] + # pass on attributes + ts_rep.attrs = ts_one_HWtoHW.attrs return ts_rep diff --git a/kenmerkendewaarden/havengetallen.py b/kenmerkendewaarden/havengetallen.py index 852cd6c..f3e492b 100644 --- a/kenmerkendewaarden/havengetallen.py +++ b/kenmerkendewaarden/havengetallen.py @@ -128,7 +128,7 @@ def calc_HWLW_moonculm_combi(data_pd_HWLW_12,culm_addtime=None): def calc_HWLW_culmhr_summary(data_pd_HWLW): - logger.info('calculate medians per hour group for LW and HW (instead of 1991 method: average of subgroups with removal of outliers)') + logger.info('calculate medians per hour group for LW and HW') data_pd_HW = data_pd_HWLW.loc[data_pd_HWLW['HWLWcode']==1] data_pd_LW = data_pd_HWLW.loc[data_pd_HWLW['HWLWcode']==2] diff --git a/kenmerkendewaarden/overschrijding.py b/kenmerkendewaarden/overschrijding.py index e55e0a9..f9b5dbb 100644 --- a/kenmerkendewaarden/overschrijding.py +++ b/kenmerkendewaarden/overschrijding.py @@ -75,7 +75,7 @@ def calc_overschrijding(df_ext:pd.DataFrame, dist:dict = None, if dist is None: dist = {} - logger.info('Calculate unfiltered distribution') + logger.info(f'Calculate unfiltered distribution (inverse={inverse})') dist['Ongefilterd'] = distribution(df_extrema_clean, inverse=inverse) # TODO: re-enable filter for river discharge peaks diff --git a/kenmerkendewaarden/slotgemiddelden.py b/kenmerkendewaarden/slotgemiddelden.py index e4e8f93..f95d6e3 100644 --- a/kenmerkendewaarden/slotgemiddelden.py +++ b/kenmerkendewaarden/slotgemiddelden.py @@ -43,8 +43,6 @@ def calc_slotgemiddelden(df_meas: pd.DataFrame, df_ext: pd.DataFrame=None, dictionary with yearly means and model fits, optionally also for extremes. """ - # TODO: assert if station of both timeseries is the same (if df_ext is present) - # initialize dict slotgemiddelden_dict = {} @@ -69,6 +67,10 @@ def calc_slotgemiddelden(df_meas: pd.DataFrame, df_ext: pd.DataFrame=None, slotgemiddelden_dict["wl_model_fit"] = pred_pd_wl if df_ext is not None: + # compare station attributes + station_attrs = [df.attrs["station"] for df in [df_meas, df_ext]] + assert all(x == station_attrs[0] for x in station_attrs) + # clip last value of the timeseries if this is exactly newyearsday if df_ext.index[-1] == pd.Timestamp(df_ext.index[-1].year,1,1, tz=df_ext.index.tz): df_ext = df_ext.iloc[:-1] @@ -106,7 +108,7 @@ def plot_slotgemiddelden(slotgemiddelden_dict:dict, slotgemiddelden_dict_all:dic Output from `kw.calc_slotgemiddelden` containing timeseries of yearly mean waterlevels and corresponding model fits. slotgemiddelden_dict_all : dict, optional - Optionally provide another dictionary with unfiltered mean waterlevls. + Optionally provide another dictionary with unfiltered mean waterlevels. Only used to plot the mean waterlevels (in grey). The default is None. Returns @@ -117,7 +119,6 @@ def plot_slotgemiddelden(slotgemiddelden_dict:dict, slotgemiddelden_dict_all:dic Figure axis handle. """ - # TODO: maybe add an escape for if the station attr is not present station = slotgemiddelden_dict['wl_mean_peryear'].attrs["station"] fig, ax = plt.subplots(figsize=(12,6)) @@ -136,6 +137,10 @@ def plot_slotgemiddelden(slotgemiddelden_dict:dict, slotgemiddelden_dict_all:dic # plot timeseries of average extremes if slotgemiddelden_dict_all is not None: + # compare station attributes + station_attrs = [dic['wl_mean_peryear'].attrs["station"] for dic in [slotgemiddelden_dict, slotgemiddelden_dict_all]] + assert all(x == station_attrs[0] for x in station_attrs) + if "HW_mean_peryear" in slotgemiddelden_dict_all.keys(): HW_mean_peryear_all = slotgemiddelden_dict_all["HW_mean_peryear"] LW_mean_peryear_all = slotgemiddelden_dict_all["LW_mean_peryear"] diff --git a/kenmerkendewaarden/tidalindicators.py b/kenmerkendewaarden/tidalindicators.py index 9dbce88..e1373f2 100644 --- a/kenmerkendewaarden/tidalindicators.py +++ b/kenmerkendewaarden/tidalindicators.py @@ -227,16 +227,14 @@ def plot_pd_series(indicators_dict, ax): ax.hlines(value, xmin, xmax, linestyle="--", color="k", label=key, zorder=1) -def plot_tidalindicators(indicators_wl:dict = None, indicators_ext = None): +def plot_tidalindicators(dict_indicators:dict): """ plot tidalindicators Parameters ---------- - indicators_wl : dict, optional - Dictionary as returned from `kw.calc_wltidalindicators()`. The default is None. - indicators_ext : TYPE, optional - Dictionary as returned from `kw.calc_HWLWtidalindicators()`. The default is None. + dict_indicators : dict, optional + Dictionary as returned from `kw.calc_wltidalindicators()` and/or `kw.calc_HWLWtidalindicators()`. The default is None. Returns ------- @@ -247,17 +245,13 @@ def plot_tidalindicators(indicators_wl:dict = None, indicators_ext = None): """ + # get and compare station attributes + station_attrs = [v.attrs["station"] for k,v in dict_indicators.items() if hasattr(v, "attrs")] + assert all(x == station_attrs[0] for x in station_attrs) + station = station_attrs[0] + fig, ax = plt.subplots(figsize=(12,6)) - - if indicators_wl is not None: - # TODO: maybe add an escape for if the station attr is not present - station = indicators_wl['wl_mean_peryear'].attrs["station"] - plot_pd_series(indicators_wl, ax) - if indicators_ext is not None: - # TODO: maybe add an escape for if the station attr is not present - station = indicators_ext['HW_mean_peryear'].attrs["station"] - plot_pd_series(indicators_ext, ax) - + plot_pd_series(dict_indicators, ax) ax.grid() ax.legend(loc=1) ax.set_ylabel("water level [m]") diff --git a/tests/test_tidalindicators.py b/tests/test_tidalindicators.py index 5eb5af4..42e2ed9 100644 --- a/tests/test_tidalindicators.py +++ b/tests/test_tidalindicators.py @@ -161,7 +161,8 @@ def test_calc_wltidalindicators(df_ext_12_2010_2014): def test_plot_wltidalindicators(df_meas_2010_2014, df_ext_12_2010_2014): wl_stats = kw.calc_wltidalindicators(df_meas_2010_2014) ext_stats = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014) - kw.plot_tidalindicators(wl_stats, ext_stats) + wl_stats.update(ext_stats) + kw.plot_tidalindicators(wl_stats) @pytest.mark.unittest