Skip to content

Commit

Permalink
94 attach station attribute (#96)
Browse files Browse the repository at this point in the history
* pass on attrs and removed station argument from `plot_gemiddeldgetij()`

* add comparison of station attrs in several functions

* simplified plot_tidalindicators and add compare for station attrs

* fixed logging
  • Loading branch information
veenstrajelmer authored Jun 24, 2024
1 parent e43eaa0 commit 36ed77a
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 56 deletions.
2 changes: 1 addition & 1 deletion examples/KWK_getcheckdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

# set logging level to INFO to get log messages
import logging
logging.basicConfig() # calling basicConfig is essential to set logging level for sub-modules
logging.getLogger("kenmerkendewaarden").setLevel(level="INFO")


# TODO: overview of data improvements: https://github.com/Deltares-research/kenmerkendewaarden/issues/29
# TODO: overview of data issues in https://github.com/Deltares-research/kenmerkendewaarden/issues/4
# TODO: missings/duplicates reported in https://github.com/Rijkswaterstaat/wm-ws-dl/issues/39. Some of the duplicates are not retrieved since we use clean_df in ddlpy
Expand Down
43 changes: 22 additions & 21 deletions examples/KWK_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

# set logging level to INFO to get log messages
import logging
logging.basicConfig() # calling basicConfig is essential to set logging level for sub-modules
logging.getLogger("kenmerkendewaarden").setLevel(level="INFO")

# TODO: HW/LW numbers not always increasing (at havengetallen): ['HANSWT','BROUWHVSGT08','PETTZD','DORDT']
Expand Down Expand Up @@ -48,9 +49,9 @@
'MARLGT','NES','NIEUWSTZL','NORTHCMRT','DENOVBTN','OOSTSDE04','OOSTSDE11','OOSTSDE14','OUDSD','OVLVHWT','Q1',
'ROOMPBNN','ROOMPBTN','SCHAARVDND','SCHEVNGN','SCHIERMNOG','SINTANLHVSGR','STAVNSE','STELLDBTN','TERNZN','TERSLNZE','TEXNZE',
'VLAKTVDRN','VLIELHVN','VLISSGN','WALSODN','WESTKPLE','WESTTSLG','WIERMGDN','YERSKE']
station_list = ["VLISSGN","HOEKVHLD","IJMDBTHVN","HARLGN","DENHDR","DELFZL","SCHIERMNOG","VLIELHVN","STELLDBTN","SCHEVNGN","ROOMPBTN"] # subset of 11 stations along the coast
# TODO: maybe add from Dillingh 2013: DORDT, MAASMSMPL, PETTZD, ROTTDM
stat_list = ['HOEKVHLD']#,'HARVT10','VLISSGN']

station_list = ['HOEKVHLD']#,'HARVT10','VLISSGN']


nap_correction = False
Expand All @@ -62,7 +63,7 @@
compute_overschrijding = True


for current_station in stat_list:
for current_station in station_list:
print(f'starting process for {current_station}')
plt.close('all')

Expand All @@ -80,8 +81,9 @@
#crop timeseries to 10y
data_pd_HWLW_10y_12 = hatyan.crop_timeseries(data_pd_HWLW_all_12, times=slice(tstart_dt,tstop_dt),onlyfull=False)






#### TIDAL INDICATORS
if compute_indicators and data_pd_meas_all is not None and data_pd_HWLW_all is not None:
print(f'tidal indicators for {current_station}')
Expand All @@ -95,12 +97,16 @@
dict_HWLWtidalindicators["hat"] = hat
dict_HWLWtidalindicators["lat"] = lat

# merge dictionaries
dict_wltidalindicators.update(dict_HWLWtidalindicators)

# plot
fig, ax = kw.plot_tidalindicators(dict_wltidalindicators, dict_HWLWtidalindicators)
fig, ax = kw.plot_tidalindicators(dict_wltidalindicators)
fig.savefig(os.path.join(dir_indicators,f'tidal_indicators_{current_station}'))







#### SLOTGEMIDDELDEN
# TODO: nodal cycle is not in same phase for all stations, this is not physically correct.
# TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND)
Expand Down Expand Up @@ -153,11 +159,12 @@

df_havengetallen, data_pd_HWLW = kw.calc_havengetallen(df_ext=data_pd_HWLW_10y_12, return_df_ext=True)

print('HWLW FIGUREN PER TIJDSKLASSE, INCLUSIEF MEDIAN LINE')
print(f'havengetallen for {current_station}')
# plot hwlw per timeclass including median
fig, axs = kw.plot_HWLW_pertimeclass(data_pd_HWLW, df_havengetallen)
fig.savefig(os.path.join(dir_havget,f'HWLW_pertijdsklasse_inclmedianline_{current_station}'))

print('AARDAPPELGRAFIEK')
# plot aardappelgrafiek
fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen)
fig.savefig(os.path.join(dir_havget, f'aardappelgrafiek_{year_slotgem}_{current_station}'))

Expand All @@ -169,11 +176,10 @@




##### GEMIDDELDE GETIJKROMMEN
if compute_gemgetij and data_pd_meas_all is not None and data_pd_HWLW_all is not None:

print(f'gem getijkrommen for {current_station}')
print(f'gemiddelde getijkrommen for {current_station}')
pred_freq = "10s" # frequency influences the accuracy of havengetallen-scaling and is writing frequency of BOI timeseries

# derive getijkrommes: raw, scaled to havengetallen, scaled to havengetallen and 12h25min period
Expand All @@ -192,11 +198,11 @@
file_boi_csv = os.path.join(dir_gemgetij, f'Getijkromme_BOI_{key}_{current_station}_slotgem{year_slotgem}.csv')
gemgetij_corr_boi[key].to_csv(file_boi_csv, float_format='%.3f')

fig_sum, ax_sum = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr, gemgetij_dict_raw=gemgetij_raw, station=current_station, tick_hours=6)
fig_sum, ax_sum = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr, gemgetij_dict_raw=gemgetij_raw, tick_hours=6)
fig_sum.savefig(os.path.join(dir_gemgetij,f'gemgetij_trefHW_{current_station}'))

print(f'plot BOI figure and compare to KW2020: {current_station}')
fig_boi, ax1_boi = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr_boi, station=current_station, tick_hours=12)
# plot BOI figure and compare to KW2020
fig_boi, ax1_boi = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr_boi, tick_hours=12)

# plot validation lines if available
# TODO: these index of this line is converted from datetimes to timedeltas to get it in the same plot
Expand Down Expand Up @@ -225,7 +231,6 @@




###OVERSCHRIJDINGSFREQUENTIES
# TODO: SLR trend correctie voor overschrijdingsfrequenties en evt ook voor andere KW?
# TODO: resulting freqs seem to be shifted w.r.t. getijtafelboekje (mail PH 9-3-2022)
Expand All @@ -246,7 +251,6 @@
if current_station =='HOEKVHLD':
dir_vali_overschr = os.path.join(dir_base,'data_overschrijding') # TODO: this data is not reproducible yet
stat_name = 'Hoek_van_Holland'
print('Load Hydra-NL distribution data and other validation data')
dist_vali_exc = {}
dist_vali_exc['Hydra-NL'] = pd.read_csv(os.path.join(dir_vali_overschr,'Processed_HydraNL','Without_model_uncertainty',f'{stat_name}.csv'), sep=';', header=[0])
dist_vali_exc['Hydra-NL']['values'] /= 100 # cm to m
Expand All @@ -262,7 +266,6 @@
dist_vali_dec['validation']['values'] /= 100

# 1. Exceedance
print('Exceedance')
dist_exc = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None,
clip_physical_break=True, dist=dist_vali_exc,
interp_freqs=Tfreqs_interested)
Expand All @@ -274,7 +277,6 @@
fig.savefig(os.path.join(dir_overschrijding, f'Exceedance_lines_{current_station}.png'))

# 2. Deceedance
print('Deceedance')
dist_dec = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None,
clip_physical_break=True, dist=dist_vali_dec, inverse=True,
interp_freqs=Tfreqs_interested)
Expand All @@ -283,4 +285,3 @@

fig, ax = kw.plot_overschrijding(dist_dec)
fig.savefig(os.path.join(dir_overschrijding, f'Deceedance_lines_{current_station}.png'))

32 changes: 20 additions & 12 deletions kenmerkendewaarden/gemiddeldgetij.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ def calc_gemiddeldgetij(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None, min_
if scale_extremes:
if df_ext is None:
raise ValueError("df_ext should be provided if scale_extremes=True")
# compare station attributes
station_attrs = [df.attrs["station"] for df in [df_meas, df_ext]]
assert all(x == station_attrs[0] for x in station_attrs)

df_havengetallen = calc_havengetallen(df_ext=df_ext, min_coverage=min_coverage)
HW_sp, LW_sp = df_havengetallen.loc[0,['HW_values_median','LW_values_median']] # spring
HW_np, LW_np = df_havengetallen.loc[6,['HW_values_median','LW_values_median']] # neap
Expand All @@ -90,14 +94,14 @@ def calc_gemiddeldgetij(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None, min_

times_pred_1mnth = pd.date_range(start=pd.Timestamp(tstop_dt.year,1,1,0,0)-pd.Timedelta(hours=12), end=pd.Timestamp(tstop_dt.year,2,1,0,0), freq=freq) #start 12 hours in advance, to assure also corrected values on desired tstart
comp_av.attrs['nodalfactors'] = False #nodalfactors=False to guarantee repetative signal
prediction_av = hatyan.prediction(comp_av, times=times_pred_1mnth)
prediction_av_ext = hatyan.calc_HWLW(ts=prediction_av, calc_HWLW345=False)
prediction_avg = hatyan.prediction(comp_av, times=times_pred_1mnth)
prediction_avg_ext = hatyan.calc_HWLW(ts=prediction_avg, calc_HWLW345=False)

time_firstHW = prediction_av_ext.loc[prediction_av_ext['HWLWcode']==1].index[0] #time of first HW
ia1 = prediction_av_ext.loc[time_firstHW:].index[0] #time of first HW
ia2 = prediction_av_ext.loc[time_firstHW:].index[2] #time of second HW
prediction_av_one = prediction_av.loc[ia1:ia2]
prediction_av_ext_one = prediction_av_ext.loc[ia1:ia2]
time_firstHW = prediction_avg_ext.loc[prediction_avg_ext['HWLWcode']==1].index[0] #time of first HW
ia1 = prediction_avg_ext.loc[time_firstHW:].index[0] #time of first HW
ia2 = prediction_avg_ext.loc[time_firstHW:].index[2] #time of second HW
prediction_avg_one = prediction_avg.loc[ia1:ia2]
prediction_avg_ext_one = prediction_avg_ext.loc[ia1:ia2]

# =============================================================================
# Hatyan predictie voor 1 jaar met gemiddelde helling maansbaan (voor afleiden spring-doodtijcyclus) >> predictie zonder nodalfactors instead
Expand Down Expand Up @@ -159,7 +163,7 @@ def calc_gemiddeldgetij(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None, min_

#timeseries for gele boekje (av/sp/np have different lengths, time is relative to HW of av and HW of sp/np are shifted there)
logger.info(f'reshape_signal GEMGETIJ: {current_station}')
prediction_av_corr_one = reshape_signal(prediction_av_one, prediction_av_ext_one, HW_goal=HW_av, LW_goal=LW_av, tP_goal=tP_goal)
prediction_av_corr_one = reshape_signal(prediction_avg_one, prediction_avg_ext_one, HW_goal=HW_av, LW_goal=LW_av, tP_goal=tP_goal)
prediction_av_corr_one.index = prediction_av_corr_one.index - prediction_av_corr_one.index[0] # make relative to first timestamp (=HW)
if scale_period: # resampling required because of scaling
prediction_av_corr_one = prediction_av_corr_one.resample(freq).nearest()
Expand Down Expand Up @@ -188,7 +192,7 @@ def calc_gemiddeldgetij(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None, min_
return gemgetij_dict


def plot_gemiddeldgetij(gemgetij_dict:dict, gemgetij_dict_raw:dict = None, station:str = None, tick_hours:int = None):
def plot_gemiddeldgetij(gemgetij_dict:dict, gemgetij_dict_raw:dict = None, tick_hours:int = None):
"""
Default plotting function for gemiddeldgetij dictionaries.
Expand All @@ -198,8 +202,6 @@ def plot_gemiddeldgetij(gemgetij_dict:dict, gemgetij_dict_raw:dict = None, stati
dictionary as returned from `kw.calc_gemiddeldgetij()`.
gemgetij_raw_dict : dict, optional
dictionary as returned from `kw.calc_gemiddeldgetij()` e.g. with uncorrected values. The default is None.
station : str, optional
station name, used in figure title. The default is None.
ticks_12h : bool, optional
whether to use xaxis ticks of 12 hours, otherwise automatic but less nice values
Expand All @@ -211,7 +213,11 @@ def plot_gemiddeldgetij(gemgetij_dict:dict, gemgetij_dict_raw:dict = None, stati
Figure axis handle.
"""
# TODO: prevent station argument
# get and compare station attributes
station_attrs = [v.attrs["station"] for k,v in gemgetij_dict.items()]
assert all(x == station_attrs[0] for x in station_attrs)
station = station_attrs[0]

logger.info(f'plot getijkromme trefHW: {station}')
fig, ax = plt.subplots(figsize=(14,7))
cmap = plt.get_cmap("tab10")
Expand Down Expand Up @@ -376,4 +382,6 @@ def repeat_signal(ts_one_HWtoHW, nb, nf):
index=ts_one_HWtoHW.index + iAdd*tidalperiod)
ts_rep = pd.concat([ts_rep,ts_add])
ts_rep = ts_rep.loc[~ts_rep.index.duplicated()]
# pass on attributes
ts_rep.attrs = ts_one_HWtoHW.attrs
return ts_rep
2 changes: 1 addition & 1 deletion kenmerkendewaarden/havengetallen.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def calc_HWLW_moonculm_combi(data_pd_HWLW_12,culm_addtime=None):


def calc_HWLW_culmhr_summary(data_pd_HWLW):
logger.info('calculate medians per hour group for LW and HW (instead of 1991 method: average of subgroups with removal of outliers)')
logger.info('calculate medians per hour group for LW and HW')
data_pd_HW = data_pd_HWLW.loc[data_pd_HWLW['HWLWcode']==1]
data_pd_LW = data_pd_HWLW.loc[data_pd_HWLW['HWLWcode']==2]

Expand Down
2 changes: 1 addition & 1 deletion kenmerkendewaarden/overschrijding.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def calc_overschrijding(df_ext:pd.DataFrame, dist:dict = None,
if dist is None:
dist = {}

logger.info('Calculate unfiltered distribution')
logger.info(f'Calculate unfiltered distribution (inverse={inverse})')
dist['Ongefilterd'] = distribution(df_extrema_clean, inverse=inverse)

# TODO: re-enable filter for river discharge peaks
Expand Down
13 changes: 9 additions & 4 deletions kenmerkendewaarden/slotgemiddelden.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ def calc_slotgemiddelden(df_meas: pd.DataFrame, df_ext: pd.DataFrame=None,
dictionary with yearly means and model fits, optionally also for extremes.
"""
# TODO: assert if station of both timeseries is the same (if df_ext is present)

# initialize dict
slotgemiddelden_dict = {}

Expand All @@ -69,6 +67,10 @@ def calc_slotgemiddelden(df_meas: pd.DataFrame, df_ext: pd.DataFrame=None,
slotgemiddelden_dict["wl_model_fit"] = pred_pd_wl

if df_ext is not None:
# compare station attributes
station_attrs = [df.attrs["station"] for df in [df_meas, df_ext]]
assert all(x == station_attrs[0] for x in station_attrs)

# clip last value of the timeseries if this is exactly newyearsday
if df_ext.index[-1] == pd.Timestamp(df_ext.index[-1].year,1,1, tz=df_ext.index.tz):
df_ext = df_ext.iloc[:-1]
Expand Down Expand Up @@ -106,7 +108,7 @@ def plot_slotgemiddelden(slotgemiddelden_dict:dict, slotgemiddelden_dict_all:dic
Output from `kw.calc_slotgemiddelden` containing timeseries
of yearly mean waterlevels and corresponding model fits.
slotgemiddelden_dict_all : dict, optional
Optionally provide another dictionary with unfiltered mean waterlevls.
Optionally provide another dictionary with unfiltered mean waterlevels.
Only used to plot the mean waterlevels (in grey). The default is None.
Returns
Expand All @@ -117,7 +119,6 @@ def plot_slotgemiddelden(slotgemiddelden_dict:dict, slotgemiddelden_dict_all:dic
Figure axis handle.
"""
# TODO: maybe add an escape for if the station attr is not present
station = slotgemiddelden_dict['wl_mean_peryear'].attrs["station"]

fig, ax = plt.subplots(figsize=(12,6))
Expand All @@ -136,6 +137,10 @@ def plot_slotgemiddelden(slotgemiddelden_dict:dict, slotgemiddelden_dict_all:dic

# plot timeseries of average extremes
if slotgemiddelden_dict_all is not None:
# compare station attributes
station_attrs = [dic['wl_mean_peryear'].attrs["station"] for dic in [slotgemiddelden_dict, slotgemiddelden_dict_all]]
assert all(x == station_attrs[0] for x in station_attrs)

if "HW_mean_peryear" in slotgemiddelden_dict_all.keys():
HW_mean_peryear_all = slotgemiddelden_dict_all["HW_mean_peryear"]
LW_mean_peryear_all = slotgemiddelden_dict_all["LW_mean_peryear"]
Expand Down
24 changes: 9 additions & 15 deletions kenmerkendewaarden/tidalindicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,14 @@ def plot_pd_series(indicators_dict, ax):
ax.hlines(value, xmin, xmax, linestyle="--", color="k", label=key, zorder=1)


def plot_tidalindicators(indicators_wl:dict = None, indicators_ext = None):
def plot_tidalindicators(dict_indicators:dict):
"""
plot tidalindicators
Parameters
----------
indicators_wl : dict, optional
Dictionary as returned from `kw.calc_wltidalindicators()`. The default is None.
indicators_ext : TYPE, optional
Dictionary as returned from `kw.calc_HWLWtidalindicators()`. The default is None.
dict_indicators : dict, optional
Dictionary as returned from `kw.calc_wltidalindicators()` and/or `kw.calc_HWLWtidalindicators()`. The default is None.
Returns
-------
Expand All @@ -247,17 +245,13 @@ def plot_tidalindicators(indicators_wl:dict = None, indicators_ext = None):
"""

# get and compare station attributes
station_attrs = [v.attrs["station"] for k,v in dict_indicators.items() if hasattr(v, "attrs")]
assert all(x == station_attrs[0] for x in station_attrs)
station = station_attrs[0]

fig, ax = plt.subplots(figsize=(12,6))

if indicators_wl is not None:
# TODO: maybe add an escape for if the station attr is not present
station = indicators_wl['wl_mean_peryear'].attrs["station"]
plot_pd_series(indicators_wl, ax)
if indicators_ext is not None:
# TODO: maybe add an escape for if the station attr is not present
station = indicators_ext['HW_mean_peryear'].attrs["station"]
plot_pd_series(indicators_ext, ax)

plot_pd_series(dict_indicators, ax)
ax.grid()
ax.legend(loc=1)
ax.set_ylabel("water level [m]")
Expand Down
3 changes: 2 additions & 1 deletion tests/test_tidalindicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ def test_calc_wltidalindicators(df_ext_12_2010_2014):
def test_plot_wltidalindicators(df_meas_2010_2014, df_ext_12_2010_2014):
wl_stats = kw.calc_wltidalindicators(df_meas_2010_2014)
ext_stats = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014)
kw.plot_tidalindicators(wl_stats, ext_stats)
wl_stats.update(ext_stats)
kw.plot_tidalindicators(wl_stats)


@pytest.mark.unittest
Expand Down

0 comments on commit 36ed77a

Please sign in to comment.