Skip to content

Commit 291f3ac

Browse files
17 improvements in output csvs for dashboards (#109)
* fixed semicolon in overschrijdingsfreqs csvs, included yearlymeans and modelfit in slotgemiddelden csv files * include other culmination hours for havengetallen csv * added gemiddeldgetij output * add min_coverage as global setting * aligned output of tidalindicators and slotgemiddelden by supporting periodindex in kw.slotgemiddelden.model_fit * periodindex in slotgem, conversion to datetimeindex for plotting * converted overschrijdings frequency column to dataframe index * add monthly mean wl csv files * first step to align output dtypes * assert series index names * expanded plot_slotgemiddelde test coverage
1 parent ff47a2c commit 291f3ac

11 files changed

+198
-101
lines changed

examples/KWK_process.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
station_list = ["HOEKVHLD"]
5858

5959
nap_correction = False
60+
min_coverage = 0.9 # for tidalindicators and slotgemiddelde #TODO: can also be used for havengetallen and gemgetij
6061

6162
compute_indicators = True
6263
compute_slotgem = True
@@ -90,18 +91,21 @@
9091
if compute_indicators and data_pd_meas_all is not None and data_pd_HWLW_all is not None:
9192
print(f'tidal indicators for {current_station}')
9293
# compute and plot tidal indicators
93-
dict_wltidalindicators = kw.calc_wltidalindicators(data_pd_meas_all, min_coverage=1)
94-
dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(data_pd_HWLW_all_12, min_coverage=1)
94+
dict_wltidalindicators = kw.calc_wltidalindicators(data_pd_meas_all, min_coverage=min_coverage)
95+
dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(data_pd_HWLW_all_12, min_coverage=min_coverage)
9596

9697
# add hat/lat
9798
df_meas_19y = data_pd_meas_all.loc["2001":"2019"]
9899
hat, lat = kw.calc_hat_lat_frommeasurements(df_meas_19y)
99100
dict_HWLWtidalindicators["hat"] = hat
100101
dict_HWLWtidalindicators["lat"] = lat
101-
102+
102103
# merge dictionaries
103104
dict_wltidalindicators.update(dict_HWLWtidalindicators)
104105

106+
# csv for monthly indicators
107+
dict_wltidalindicators['wl_mean_permonth'].to_csv(os.path.join(dir_indicators,f'meanwl_permonth_{current_station}.txt'))
108+
105109
# plot
106110
fig, ax = kw.plot_tidalindicators(dict_wltidalindicators)
107111
fig.savefig(os.path.join(dir_indicators,f'tidal_indicators_{current_station}'))
@@ -123,17 +127,17 @@
123127
# only years with enough values and after potential physical break
124128
slotgemiddelden_valid = kw.calc_slotgemiddelden(df_meas=data_pd_meas_all.loc[:tstop_dt],
125129
df_ext=data_pd_HWLW_all_12.loc[:tstop_dt],
126-
min_coverage=1, clip_physical_break=True)
130+
min_coverage=min_coverage, clip_physical_break=True)
127131

128132
# plot slotgemiddelden
129133
fig1, ax1 = kw.plot_slotgemiddelden(slotgemiddelden_valid, slotgemiddelden_all)
130134
ax1.set_xlim(fig_alltimes_ext)
131135

132-
# plot and write slotgemiddelde value (for waterlevels only)
133-
slotgem_time_value = slotgemiddelden_valid["wl_model_fit"].iloc[[-1]]
134-
ax1.plot(slotgem_time_value, ".k", label=f'slotgemiddelde for {year_slotgem}')
135-
# TODO: is upcasted to dataframe before csv writing which results in 0-column, avoid this
136-
slotgem_time_value.to_csv(os.path.join(dir_slotgem,f'slotgem_value_{current_station}.txt'))
136+
# plot and write slotgemiddelde value (for waterlevels only), the slotgemiddelde is the last value of the model fit
137+
slotgemiddelden_valid['HW_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanHW_{current_station}.txt'))
138+
slotgemiddelden_valid['LW_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanLW_{current_station}.txt'))
139+
slotgemiddelden_valid['wl_mean_peryear'].to_csv(os.path.join(dir_slotgem,f'meanwl_{current_station}.txt'))
140+
slotgemiddelden_valid['wl_model_fit'].to_csv(os.path.join(dir_slotgem,f'modelfit_{current_station}.txt'))
137141

138142
# get and plot validation timeseries (yearly mean wl/HW/LW)
139143
station_name_dict = {'HOEKVHLD':'hoek',
@@ -169,10 +173,8 @@
169173
fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen)
170174
fig.savefig(os.path.join(dir_havget, f'aardappelgrafiek_{year_slotgem}_{current_station}'))
171175

172-
#write to csv # TODO: do we need this in this format?
173-
HWLW_culmhr_summary_exp = df_havengetallen.loc[[6,'mean',0]] #select neap/mean/springtide
174-
HWLW_culmhr_summary_exp.index = ['neap','mean','spring']
175-
HWLW_culmhr_summary_exp.to_csv(os.path.join(dir_havget, f'havengetallen_{year_slotgem}_{current_station}.csv'),float_format='%.3f')
176+
#write to csv
177+
df_havengetallen.to_csv(os.path.join(dir_havget, f'havengetallen_{year_slotgem}_{current_station}.csv'),float_format='%.3f')
176178

177179

178180

@@ -196,24 +198,30 @@
196198
fig, ax = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr, gemgetij_dict_raw=gemgetij_raw, tick_hours=6)
197199

198200
# plot validation lines if available
199-
# TODO: these index of this line is converted from datetimes to timedeltas to get it in the same plot
200-
# TODO: the shape is different, so compare to gele boekje instead
201+
# TODO: the shape is different, so compare krommes to gele boekje instead of validation data
201202
dir_vali_krommen = r'p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\07_Figuren\figures_ppSCL_2\final20201211'
202203
for tidaltype in ["gemgetij","springtij","doodtij"]:
203204
file_vali_getijkromme = os.path.join(dir_vali_krommen,f'{tidaltype}kromme_{current_station}_havengetallen{year_slotgem}.csv')
204205
if not os.path.exists(file_vali_getijkromme):
205206
continue
206207
df_vali_getij = pd.read_csv(file_vali_getijkromme, index_col=0, parse_dates=True)
208+
# convert from datetimes to timedeltas to get it in the same plot (we used datetimes before)
207209
df_vali_getij.index = df_vali_getij.index - df_vali_getij.index[0]
208210
ax.plot(df_vali_getij['Water Level [m]'], color='grey', zorder=0, label=f'validation KW2020 {tidaltype}')
209211
ax.legend(loc=4)
210212
fig.savefig(os.path.join(dir_gemgetij,f'gemgetij_trefHW_{current_station}'))
211213

214+
# write corrected timeseries to csv files
215+
# TODO: better representation of negative timedeltas requested in https://github.com/pandas-dev/pandas/issues/17232#issuecomment-2205579156, maybe convert timedeltaIndex to minutes instead?
216+
for key in gemgetij_corr.keys():
217+
file_csv = os.path.join(dir_gemgetij, f'Getijkromme_{key}_{current_station}_slotgem{year_slotgem}.csv')
218+
gemgetij_corr[key].to_csv(file_csv, float_format='%.3f')
219+
212220
# plot BOI figure and compare to KW2020
213221
fig_boi, ax1_boi = kw.plot_gemiddeldgetij(gemgetij_dict=gemgetij_corr_boi, tick_hours=12)
214222
fig_boi.savefig(os.path.join(dir_gemgetij,f'gemspringdoodtijkromme_BOI_{current_station}_slotgem{year_slotgem}.png'))
215223

216-
# write boi timeseries to csv files # TODO: maybe convert timedeltaIndex to minutes instead?
224+
# write BOI timeseries to csv files
217225
for key in gemgetij_corr_boi.keys():
218226
file_boi_csv = os.path.join(dir_gemgetij, f'Getijkromme_BOI_{key}_{current_station}_slotgem{year_slotgem}.csv')
219227
gemgetij_corr_boi[key].to_csv(file_boi_csv, float_format='%.3f')
@@ -229,27 +237,30 @@
229237
def initiate_dist_with_hydra_nl(station):
230238
# get Hydra-NL and KWK-RMM validation data (only available for selection of stations)
231239
# TODO: this data is not reproducible yet: https://github.com/Deltares-research/kenmerkendewaarden/issues/107
232-
# TODO: HOEKVHLD Hydra values are different than old ones in p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Data\Processed_HydraNL
240+
# TODO: HOEKVHLD Hydra values are different than old ones in validation line and p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Data\Processed_HydraNL
233241

234242
dist_dict = {}
235243
dir_overschr_hydra = os.path.join(dir_base,'data_hydraNL')
236244
file_hydra_nl = os.path.join(dir_overschr_hydra, f'{station}.xls')
237245
if os.path.exists(file_hydra_nl):
238246
df_hydra_nl = pd.read_table(file_hydra_nl, encoding='latin-1', decimal=',', header=0)
239-
df_hydra_nl['values_Tfreq'] = 1/ df_hydra_nl['Terugkeertijd [jaar]']
247+
df_hydra_nl.index = 1/df_hydra_nl['Terugkeertijd [jaar]']
240248
df_hydra_nl['values'] = df_hydra_nl['Belastingniveau [m+NAP]/Golfparameter [m]/[s]/Sterkte bekleding [-]']
241-
df_hydra_nl = df_hydra_nl.loc[:, ['values_Tfreq','values']]
249+
df_hydra_nl = df_hydra_nl[['values']]
242250
df_hydra_nl.attrs['station'] = station
243251
dist_dict['Hydra-NL'] = df_hydra_nl
244252
return dist_dict
245253

246254
def add_validation_dist(dist_dict, dist_type, station):
247-
dir_overschr_vali = os.path.join(dir_base,'data_overschrijding','Tables')
248-
file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{station}.csv')
249-
if not os.path.exists(file_validation):
255+
station_names_vali_dict = {"HOEKVHLD":"Hoek_van_Holland"}
256+
if station not in station_names_vali_dict.keys():
250257
return
258+
dir_overschr_vali = r"p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\Onder_overschrijdingslijnen_Boyan\Tables"
259+
file_validation = os.path.join(dir_overschr_vali, f'{dist_type}_lines', f'{dist_type}_lines_{station_names_vali_dict[station]}.csv')
251260
df_validation = pd.read_csv(file_validation, sep=';')
261+
df_validation = df_validation.rename({"value":"values"},axis=1)
252262
df_validation['values'] /= 100
263+
df_validation = df_validation.set_index("value_Tfreq", drop=True)
253264
df_validation.attrs['station'] = station
254265
dist_dict['validation'] = df_validation
255266

@@ -268,8 +279,8 @@ def add_validation_dist(dist_dict, dist_type, station):
268279
clip_physical_break=True, dist=dist_exc_hydra,
269280
interp_freqs=Tfreqs_interested)
270281
add_validation_dist(dist_exc, dist_type='exceedance', station=current_station)
271-
df_interp = dist_exc['Geinterpoleerd']
272-
df_interp.to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'), index=False, sep=';')
282+
dist_exc['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'))
283+
# dist_exc['Gecombineerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}_gecombineerd.csv'))
273284

274285
fig, ax = kw.plot_overschrijding(dist_exc)
275286
ax.set_ylim(0,5.5)
@@ -280,8 +291,8 @@ def add_validation_dist(dist_dict, dist_type, station):
280291
clip_physical_break=True, inverse=True,
281292
interp_freqs=Tfreqs_interested)
282293
add_validation_dist(dist_dec, dist_type='deceedance', station=current_station)
283-
df_interp = dist_dec['Geinterpoleerd']
284-
df_interp.to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'), index=False, sep=';')
294+
dist_dec['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'))
295+
# dist_dec['Gecombineerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}_gecombineerd.csv'))
285296

286297
fig, ax = kw.plot_overschrijding(dist_dec)
287298
fig.savefig(os.path.join(dir_overschrijding, f'Deceedance_lines_{current_station}.png'))

kenmerkendewaarden/gemiddeldgetij.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ def reshape_signal(ts, ts_ext, HW_goal, LW_goal, tP_goal=None):
329329
"""
330330
# early escape # TODO: should also be possible to only scale tP_goal
331331
if HW_goal is None and LW_goal is None:
332+
ts.index.name = 'timedelta'
332333
return ts
333334

334335
# TODO: consider removing the need for ts_ext, it should be possible with min/max, although the HW of the raw timeseries are not exactly equal
@@ -346,7 +347,7 @@ def reshape_signal(ts, ts_ext, HW_goal, LW_goal, tP_goal=None):
346347
ts_time_lastHW = ts_ext[bool_HW].index[-1]
347348
ts_corr = ts.copy().loc[ts_time_firstHW:ts_time_lastHW]
348349

349-
ts_corr['times'] = ts_corr.index #this is necessary since datetimeindex with freq is not editable, and Series is editable
350+
ts_corr['timedelta'] = ts_corr.index #this is necessary since datetimeindex with freq is not editable, and Series is editable
350351
for i in np.arange(0,len(timesHW)-1):
351352
HW1_val = ts_corr.loc[timesHW[i],'values']
352353
HW2_val = ts_corr.loc[timesHW[i+1],'values']
@@ -363,9 +364,9 @@ def reshape_signal(ts, ts_ext, HW_goal, LW_goal, tP_goal=None):
363364
ts_corr['values_new'] = temp
364365

365366
tide_HWtoHW = ts_corr.loc[timesHW[i]:timesHW[i+1]]
366-
ts_corr['times'] = pd.date_range(start=ts_corr.loc[timesHW[i],'times'],end=ts_corr.loc[timesHW[i],'times']+tP_goal,periods=len(tide_HWtoHW))
367+
ts_corr['timedelta'] = pd.date_range(start=ts_corr.loc[timesHW[i],'timedelta'],end=ts_corr.loc[timesHW[i],'timedelta']+tP_goal,periods=len(tide_HWtoHW))
367368

368-
ts_corr = ts_corr.set_index('times',drop=True)
369+
ts_corr = ts_corr.set_index('timedelta',drop=True)
369370
ts_corr['values'] = ts_corr['values_new']
370371
ts_corr = ts_corr.drop(['values_new'],axis=1)
371372
return ts_corr

kenmerkendewaarden/havengetallen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def calc_HWLW_moonculm_combi(data_pd_HWLW_12:pd.DataFrame, moonculm_offset:int =
154154

155155

156156
def calc_HWLW_culmhr_summary(data_pd_HWLW):
157-
logger.info('calculate medians per hour group for LW and HW')
157+
logger.info('calculate median per hour group for LW and HW')
158158
data_pd_HW = data_pd_HWLW.loc[data_pd_HWLW['HWLWcode']==1]
159159
data_pd_LW = data_pd_HWLW.loc[data_pd_HWLW['HWLWcode']==2]
160160

0 commit comments

Comments
 (0)