Skip to content

Commit

Permalink
alignments of argument names in public functions (#129)
Browse files Browse the repository at this point in the history
* alignments of argument names in public functions and variable names in example scripts

* fixed testcase

* cleanup old todo comments
  • Loading branch information
veenstrajelmer authored Aug 30, 2024
1 parent e70132c commit 20958fb
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 49 deletions.
6 changes: 3 additions & 3 deletions examples/KWK_getcheckdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,11 @@
print(f'plotting timeseries data for {current_station}')

# load data
df_ts_meas = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False)
df_ext_meas = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True)
df_meas = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False)
df_ext = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True)

# create and save figure
fig,(ax1, ax2) = kw.plot_measurements(df=df_ts_meas, df_ext=df_ext_meas)
fig,(ax1, ax2) = kw.plot_measurements(df_meas=df_meas, df_ext=df_ext)
file_wl_png = os.path.join(dir_meas,f'ts_{current_station}.png')
ax1.set_xlim(pd.Timestamp(start_date), pd.Timestamp(end_date)) # entire period
fig.savefig(file_wl_png.replace('.png','_alldata.png'))
Expand Down
75 changes: 36 additions & 39 deletions examples/KWK_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
logging.basicConfig() # calling basicConfig is essential to set logging level for sub-modules
logging.getLogger("kenmerkendewaarden").setLevel(level="INFO")

# TODO: HW/LW numbers not always increasing (at havengetallen): ['HANSWT','BROUWHVSGT08','PETTZD','DORDT'], overview in https://github.com/Deltares-research/kenmerkendewaarden/issues/101 and the linked wm-ws-dl issue.
# TODO: if persists with updated ddl data, consider catching this ValueError: https://github.com/Deltares/hatyan/issues/311 and https://github.com/Deltares/hatyan/issues/327
# TODO: HW/LW numbers not always increasing (at havengetallen): ['HANSWT','BROUWHVSGT08','PETTZD','DORDT']
# overview in https://github.com/Deltares-research/kenmerkendewaarden/issues/101 and the linked wm-ws-dl issue

tstart_dt = pd.Timestamp(2011,1,1, tz="UTC+01:00")
tstop_dt = pd.Timestamp(2021,1,1, tz="UTC+01:00")
Expand All @@ -27,8 +27,6 @@
# dir_base = r'p:\11208031-010-kenmerkende-waarden-k\work'
dir_base = r'p:\11210325-005-kenmerkende-waarden\work'
dir_meas = os.path.join(dir_base,'measurements_wl_18700101_20240101')
# TODO: move to full data folder (otherwise overschrijding and slotgemiddelde is completely wrong)
# dir_meas = os.path.join(dir_base,'measurements_wl_20101201_20220201')

dir_indicators = os.path.join(dir_base,f'out_tidalindicators_{year_slotgem}')
dir_slotgem = os.path.join(dir_base,f'out_slotgem_{year_slotgem}')
Expand Down Expand Up @@ -81,37 +79,37 @@
plt.close('all')

# timeseries are used for slotgemiddelden, gemgetijkrommen (needs slotgem+havget)
data_pd_meas_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False,
nap_correction=nap_correction, drop_duplicates=drop_duplicates)
if data_pd_meas_all is not None:
df_meas_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False,
nap_correction=nap_correction, drop_duplicates=drop_duplicates)
if df_meas_all is not None:
#crop measurement data
data_pd_meas_10y = hatyan.crop_timeseries(data_pd_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10)))#,onlyfull=False)
df_meas_10y = hatyan.crop_timeseries(df_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10)))#,onlyfull=False)

# extremes are used for slotgemiddelden, havengetallen, overschrijding
data_pd_HWLW_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True,
nap_correction=nap_correction, drop_duplicates=drop_duplicates)
if data_pd_HWLW_all is not None:
df_ext_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True,
nap_correction=nap_correction, drop_duplicates=drop_duplicates)
if df_ext_all is not None:
# TODO: make calc_HWLW12345to12() faster: https://github.com/Deltares/hatyan/issues/311
data_pd_HWLW_all_12 = hatyan.calc_HWLW12345to12(data_pd_HWLW_all) #convert 12345 to 12 by taking minimum of 345 as 2 (laagste laagwater)
df_ext_all_12 = hatyan.calc_HWLW12345to12(df_ext_all) #convert 12345 to 12 by taking minimum of 345 as 2 (laagste laagwater)
#crop timeseries to 10y
data_pd_HWLW_10y_12 = hatyan.crop_timeseries(data_pd_HWLW_all_12, times=slice(tstart_dt,tstop_dt),onlyfull=False)
df_ext_10y_12 = hatyan.crop_timeseries(df_ext_all_12, times=slice(tstart_dt,tstop_dt),onlyfull=False)




#### TIDAL INDICATORS
if compute_indicators and data_pd_meas_all is not None and data_pd_HWLW_all is not None:
if compute_indicators and df_meas_all is not None and df_ext_all is not None:
print(f'tidal indicators for {current_station}')
# compute and plot tidal indicators
dict_wltidalindicators = kw.calc_wltidalindicators(data_pd_meas_all, min_coverage=min_coverage)
dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(data_pd_HWLW_all_12, min_coverage=min_coverage)
dict_wltidalindicators = kw.calc_wltidalindicators(df_meas=df_meas_all, min_coverage=min_coverage)
dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(df_ext=df_ext_all_12, min_coverage=min_coverage)

# add hat/lat
df_meas_19y = data_pd_meas_all.loc["2001":"2019"]
df_meas_19y = df_meas_all.loc["2001":"2019"]
hat, lat = kw.calc_hat_lat_frommeasurements(df_meas_19y)
dict_HWLWtidalindicators["hat"] = hat
dict_HWLWtidalindicators["lat"] = lat

# merge dictionaries
dict_wltidalindicators.update(dict_HWLWtidalindicators)

Expand All @@ -128,17 +126,17 @@
#### SLOTGEMIDDELDEN
# TODO: nodal cycle is not in same phase for all stations, this is not physically correct.
# TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND)
if compute_slotgem and data_pd_meas_all is not None and data_pd_HWLW_all is not None:
if compute_slotgem and df_meas_all is not None and df_ext_all is not None:
print(f'slotgemiddelden for {current_station}')

# compute slotgemiddelden, exclude all values after tstop_dt (is year_slotgem)
# including years with too little values and years before physical break
slotgemiddelden_all = kw.calc_slotgemiddelden(df_meas=data_pd_meas_all.loc[:tstop_dt],
df_ext=data_pd_HWLW_all_12.loc[:tstop_dt],
slotgemiddelden_all = kw.calc_slotgemiddelden(df_meas=df_meas_all.loc[:tstop_dt],
df_ext=df_ext_all_12.loc[:tstop_dt],
min_coverage=0, clip_physical_break=True)
# only years with enough values and after potential physical break
slotgemiddelden_valid = kw.calc_slotgemiddelden(df_meas=data_pd_meas_all.loc[:tstop_dt],
df_ext=data_pd_HWLW_all_12.loc[:tstop_dt],
slotgemiddelden_valid = kw.calc_slotgemiddelden(df_meas=df_meas_all.loc[:tstop_dt],
df_ext=df_ext_all_12.loc[:tstop_dt],
min_coverage=min_coverage, clip_physical_break=True)

# plot slotgemiddelden
Expand Down Expand Up @@ -173,16 +171,16 @@


### HAVENGETALLEN
if compute_havengetallen and data_pd_HWLW_all is not None:
if compute_havengetallen and df_ext_all is not None:
print(f'havengetallen for {current_station}')
df_havengetallen, data_pd_HWLW = kw.calc_havengetallen(df_ext=data_pd_HWLW_10y_12, return_df_ext=True)
df_havengetallen, df_HWLW = kw.calc_havengetallen(df_ext=df_ext_10y_12, return_df_ext=True)

# plot hwlw per timeclass including median
fig, axs = kw.plot_HWLW_pertimeclass(data_pd_HWLW, df_havengetallen)
fig, axs = kw.plot_HWLW_pertimeclass(df_ext=df_HWLW, df_havengetallen=df_havengetallen)
fig.savefig(os.path.join(dir_havget,f'HWLW_pertijdsklasse_inclmedianline_{current_station}'))

# plot aardappelgrafiek
fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen)
fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen=df_havengetallen)
fig.savefig(os.path.join(dir_havget, f'aardappelgrafiek_{year_slotgem}_{current_station}'))

#write to csv
Expand All @@ -192,18 +190,18 @@


##### GEMIDDELDE GETIJKROMMEN
if compute_gemgetij and data_pd_meas_all is not None and data_pd_HWLW_all is not None:
if compute_gemgetij and df_meas_all is not None and df_ext_all is not None:
print(f'gemiddelde getijkrommen for {current_station}')
pred_freq = "10s" # frequency influences the accuracy of havengetallen-scaling and is writing frequency of BOI timeseries

# derive getijkrommes: raw, scaled to havengetallen, scaled to havengetallen and 12h25min period
gemgetij_raw = kw.calc_gemiddeldgetij(df_meas=data_pd_meas_10y, df_ext=None,
gemgetij_raw = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=None,
freq=pred_freq, nb=0, nf=0,
scale_extremes=False, scale_period=False)
gemgetij_corr = kw.calc_gemiddeldgetij(df_meas=data_pd_meas_10y, df_ext=data_pd_HWLW_10y_12,
gemgetij_corr = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=df_ext_10y_12,
freq=pred_freq, nb=1, nf=1,
scale_extremes=True, scale_period=False)
gemgetij_corr_boi = kw.calc_gemiddeldgetij(df_meas=data_pd_meas_10y, df_ext=data_pd_HWLW_10y_12,
gemgetij_corr_boi = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=df_ext_10y_12,
freq=pred_freq, nb=0, nf=4,
scale_extremes=True, scale_period=True)

Expand Down Expand Up @@ -241,10 +239,11 @@



###OVERSCHRIJDINGSFREQUENTIES
#### OVERSCHRIJDINGSFREQUENTIES
# TODO: SLR trend correctie voor overschrijdingsfrequenties en evt ook voor andere KW?
# TODO: resulting freqs seem to be shifted w.r.t. getijtafelboekje (mail PH 9-3-2022)
# plots beoordelen: rode lijn moet ongeveer verlengde zijn van groene, als die ineens omhoog piekt komt dat door hele extreme waardes die je dan vermoedelijk ook al ziet in je groene lijn
# plots beoordelen: rode lijn moet ongeveer verlengde zijn van groene, als die ineens
# omhoog piekt komt dat door hele extreme waardes die je dan vermoedelijk ook al ziet in je groene lijn

def initiate_dist_with_hydra_nl(station):
"""
Expand Down Expand Up @@ -283,32 +282,30 @@ def add_validation_dist(dist_dict, dist_type, station):
freqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200,
1/500, 1/1000, 1/2000, 1/4000, 1/5000, 1/10000]

if compute_overschrijding and data_pd_HWLW_all is not None:
if compute_overschrijding and df_ext_all is not None:
print(f'overschrijdingsfrequenties for {current_station}')

# only include data up to year_slotgem
data_pd_measext = data_pd_HWLW_all_12.loc[:tstop_dt]
df_measext = df_ext_all_12.loc[:tstop_dt]

# 1. Exceedance
dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station)
dist_exc = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None,
dist_exc = kw.calc_overschrijding(df_ext=df_measext, rule_type=None, rule_value=None,
clip_physical_break=True, dist=dist_exc_hydra,
interp_freqs=freqs_interested)
add_validation_dist(dist_exc, dist_type='exceedance', station=current_station)
dist_exc['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'))
# dist_exc['Gecombineerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}_gecombineerd.csv'))

fig, ax = kw.plot_overschrijding(dist_exc)
ax.set_ylim(0,5.5)
fig.savefig(os.path.join(dir_overschrijding, f'Exceedance_lines_{current_station}.png'))

# 2. Deceedance
dist_dec = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None,
dist_dec = kw.calc_overschrijding(df_ext=df_measext, rule_type=None, rule_value=None,
clip_physical_break=True, inverse=True,
interp_freqs=freqs_interested)
add_validation_dist(dist_dec, dist_type='deceedance', station=current_station)
dist_dec['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'))
# dist_dec['Gecombineerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}_gecombineerd.csv'))

fig, ax = kw.plot_overschrijding(dist_dec)
fig.savefig(os.path.join(dir_overschrijding, f'Deceedance_lines_{current_station}.png'))
12 changes: 6 additions & 6 deletions kenmerkendewaarden/data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ def plot_measurements_amount(df: pd.DataFrame, relative: bool = False):
return fig, ax


def plot_measurements(df: pd.DataFrame, df_ext: pd.DataFrame = None):
def plot_measurements(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None):
"""
Generate a timeseries figure for the measurement timeseries (and extremes) of this station.
Parameters
----------
df : pd.DataFrame
df_meas : pd.DataFrame
Dataframe with the measurement timeseries for a particular station.
df_ext : pd.DataFrame, optional
Dataframe with the measurement extremes for a particular station.
Expand All @@ -85,17 +85,17 @@ def plot_measurements(df: pd.DataFrame, df_ext: pd.DataFrame = None):
Figure axis handle.
"""
station_df = df.attrs["station"]
station_df = df_meas.attrs["station"]
if df_ext is not None:
station_df_ext = df_ext.attrs["station"]
assert station_df == station_df_ext
fig, (ax1, ax2) = hatyan.plot_timeseries(ts=df, ts_ext=df_ext)
fig, (ax1, ax2) = hatyan.plot_timeseries(ts=df_meas, ts_ext=df_ext)
else:
fig, (ax1, ax2) = hatyan.plot_timeseries(ts=df)
fig, (ax1, ax2) = hatyan.plot_timeseries(ts=df_meas)
ax1.set_title(f"timeseries for {station_df}")

# calculate monthly/yearly mean for meas wl data
df_meas_values = df["values"]
df_meas_values = df_meas["values"]
mean_peryearmonth_long = df_meas_values.groupby(
pd.PeriodIndex(df_meas_values.index, freq="M")
).mean()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_plot_measurements_amount(dir_meas_amount, extremes):

@pytest.mark.unittest
def test_plot_measurements(df_meas_2010, df_ext_2010):
kw.plot_measurements(df=df_meas_2010, df_ext=df_ext_2010)
kw.plot_measurements(df_meas=df_meas_2010, df_ext=df_ext_2010)


@pytest.mark.unittest
Expand Down

0 comments on commit 20958fb

Please sign in to comment.