alignments of argument names in public functions (#129)

* alignments of argument names in public functions and variable names in example scripts * fixed testcase * cleanup old todo comments
Deltares-research · Aug 30, 2024 · 20958fb · 20958fb
1 parent e70132c
commit 20958fb
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 49 deletions.
diff --git a/examples/KWK_getcheckdata.py b/examples/KWK_getcheckdata.py
@@ -113,11 +113,11 @@
     print(f'plotting timeseries data for {current_station}')
 
     # load data
-    df_ts_meas = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False)
-    df_ext_meas = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True)
+    df_meas = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False)
+    df_ext = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True)
 
     # create and save figure
-    fig,(ax1, ax2) = kw.plot_measurements(df=df_ts_meas, df_ext=df_ext_meas)
+    fig,(ax1, ax2) = kw.plot_measurements(df_meas=df_meas, df_ext=df_ext)
     file_wl_png = os.path.join(dir_meas,f'ts_{current_station}.png')
     ax1.set_xlim(pd.Timestamp(start_date), pd.Timestamp(end_date)) # entire period
     fig.savefig(file_wl_png.replace('.png','_alldata.png'))

diff --git a/examples/KWK_process.py b/examples/KWK_process.py
@@ -13,8 +13,8 @@
 logging.basicConfig() # calling basicConfig is essential to set logging level for sub-modules
 logging.getLogger("kenmerkendewaarden").setLevel(level="INFO")
 
-# TODO: HW/LW numbers not always increasing (at havengetallen): ['HANSWT','BROUWHVSGT08','PETTZD','DORDT'], overview in https://github.com/Deltares-research/kenmerkendewaarden/issues/101 and the linked wm-ws-dl issue.
-# TODO: if persists with updated ddl data, consider catching this ValueError: https://github.com/Deltares/hatyan/issues/311 and https://github.com/Deltares/hatyan/issues/327
+# TODO: HW/LW numbers not always increasing (at havengetallen): ['HANSWT','BROUWHVSGT08','PETTZD','DORDT']
+# overview in https://github.com/Deltares-research/kenmerkendewaarden/issues/101 and the linked wm-ws-dl issue
 
 tstart_dt = pd.Timestamp(2011,1,1, tz="UTC+01:00")
 tstop_dt = pd.Timestamp(2021,1,1, tz="UTC+01:00")
@@ -27,8 +27,6 @@
 # dir_base = r'p:\11208031-010-kenmerkende-waarden-k\work'
 dir_base = r'p:\11210325-005-kenmerkende-waarden\work'
 dir_meas = os.path.join(dir_base,'measurements_wl_18700101_20240101')
-# TODO: move to full data folder (otherwise overschrijding and slotgemiddelde is completely wrong)
-# dir_meas = os.path.join(dir_base,'measurements_wl_20101201_20220201')
 
 dir_indicators = os.path.join(dir_base,f'out_tidalindicators_{year_slotgem}')
 dir_slotgem = os.path.join(dir_base,f'out_slotgem_{year_slotgem}')
@@ -81,37 +79,37 @@
     plt.close('all')
 
     # timeseries are used for slotgemiddelden, gemgetijkrommen (needs slotgem+havget)
-    data_pd_meas_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False, 
-                                            nap_correction=nap_correction, drop_duplicates=drop_duplicates)
-    if data_pd_meas_all is not None:
+    df_meas_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=False, 
+                                       nap_correction=nap_correction, drop_duplicates=drop_duplicates)
+    if df_meas_all is not None:
         #crop measurement data
-        data_pd_meas_10y = hatyan.crop_timeseries(data_pd_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10)))#,onlyfull=False)
+        df_meas_10y = hatyan.crop_timeseries(df_meas_all, times=slice(tstart_dt,tstop_dt-dt.timedelta(minutes=10)))#,onlyfull=False)
 
     # extremes are used for slotgemiddelden, havengetallen, overschrijding
-    data_pd_HWLW_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True,
-                                            nap_correction=nap_correction, drop_duplicates=drop_duplicates)
-    if data_pd_HWLW_all is not None:
+    df_ext_all = kw.read_measurements(dir_output=dir_meas, station=current_station, extremes=True,
+                                      nap_correction=nap_correction, drop_duplicates=drop_duplicates)
+    if df_ext_all is not None:
         # TODO: make calc_HWLW12345to12() faster: https://github.com/Deltares/hatyan/issues/311
-        data_pd_HWLW_all_12 = hatyan.calc_HWLW12345to12(data_pd_HWLW_all) #convert 12345 to 12 by taking minimum of 345 as 2 (laagste laagwater)
+        df_ext_all_12 = hatyan.calc_HWLW12345to12(df_ext_all) #convert 12345 to 12 by taking minimum of 345 as 2 (laagste laagwater)
         #crop timeseries to 10y
-        data_pd_HWLW_10y_12 = hatyan.crop_timeseries(data_pd_HWLW_all_12, times=slice(tstart_dt,tstop_dt),onlyfull=False)
+        df_ext_10y_12 = hatyan.crop_timeseries(df_ext_all_12, times=slice(tstart_dt,tstop_dt),onlyfull=False)
 
 
 
 
     #### TIDAL INDICATORS
-    if compute_indicators and data_pd_meas_all is not None and data_pd_HWLW_all is not None:
+    if compute_indicators and df_meas_all is not None and df_ext_all is not None:
         print(f'tidal indicators for {current_station}')
         # compute and plot tidal indicators
-        dict_wltidalindicators = kw.calc_wltidalindicators(data_pd_meas_all, min_coverage=min_coverage)
-        dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(data_pd_HWLW_all_12, min_coverage=min_coverage)
+        dict_wltidalindicators = kw.calc_wltidalindicators(df_meas=df_meas_all, min_coverage=min_coverage)
+        dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(df_ext=df_ext_all_12, min_coverage=min_coverage)
 
         # add hat/lat
-        df_meas_19y = data_pd_meas_all.loc["2001":"2019"]
+        df_meas_19y = df_meas_all.loc["2001":"2019"]
         hat, lat = kw.calc_hat_lat_frommeasurements(df_meas_19y)
         dict_HWLWtidalindicators["hat"] = hat
         dict_HWLWtidalindicators["lat"] = lat
-                
+
         # merge dictionaries
         dict_wltidalindicators.update(dict_HWLWtidalindicators)
 
@@ -128,17 +126,17 @@
     #### SLOTGEMIDDELDEN
     # TODO: nodal cycle is not in same phase for all stations, this is not physically correct.
     # TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND)
-    if compute_slotgem and data_pd_meas_all is not None and data_pd_HWLW_all is not None:
+    if compute_slotgem and df_meas_all is not None and df_ext_all is not None:
         print(f'slotgemiddelden for {current_station}')
 
         # compute slotgemiddelden, exclude all values after tstop_dt (is year_slotgem)
         # including years with too little values and years before physical break
-        slotgemiddelden_all = kw.calc_slotgemiddelden(df_meas=data_pd_meas_all.loc[:tstop_dt], 
-                                                      df_ext=data_pd_HWLW_all_12.loc[:tstop_dt], 
+        slotgemiddelden_all = kw.calc_slotgemiddelden(df_meas=df_meas_all.loc[:tstop_dt], 
+                                                      df_ext=df_ext_all_12.loc[:tstop_dt], 
                                                       min_coverage=0, clip_physical_break=True)
         # only years with enough values and after potential physical break
-        slotgemiddelden_valid = kw.calc_slotgemiddelden(df_meas=data_pd_meas_all.loc[:tstop_dt], 
-                                                        df_ext=data_pd_HWLW_all_12.loc[:tstop_dt], 
+        slotgemiddelden_valid = kw.calc_slotgemiddelden(df_meas=df_meas_all.loc[:tstop_dt], 
+                                                        df_ext=df_ext_all_12.loc[:tstop_dt], 
                                                         min_coverage=min_coverage, clip_physical_break=True)
 
         # plot slotgemiddelden
@@ -173,16 +171,16 @@
 
 
     ### HAVENGETALLEN 
-    if compute_havengetallen and data_pd_HWLW_all is not None:
+    if compute_havengetallen and df_ext_all is not None:
         print(f'havengetallen for {current_station}')
-        df_havengetallen, data_pd_HWLW = kw.calc_havengetallen(df_ext=data_pd_HWLW_10y_12, return_df_ext=True)
+        df_havengetallen, df_HWLW = kw.calc_havengetallen(df_ext=df_ext_10y_12, return_df_ext=True)
 
         # plot hwlw per timeclass including median
-        fig, axs = kw.plot_HWLW_pertimeclass(data_pd_HWLW, df_havengetallen)
+        fig, axs = kw.plot_HWLW_pertimeclass(df_ext=df_HWLW, df_havengetallen=df_havengetallen)
         fig.savefig(os.path.join(dir_havget,f'HWLW_pertijdsklasse_inclmedianline_{current_station}'))
 
         # plot aardappelgrafiek
-        fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen)
+        fig, (ax1,ax2) = kw.plot_aardappelgrafiek(df_havengetallen=df_havengetallen)
         fig.savefig(os.path.join(dir_havget, f'aardappelgrafiek_{year_slotgem}_{current_station}'))
 
         #write to csv
@@ -192,18 +190,18 @@
 
 
     ##### GEMIDDELDE GETIJKROMMEN
-    if compute_gemgetij and data_pd_meas_all is not None and data_pd_HWLW_all is not None:
+    if compute_gemgetij and df_meas_all is not None and df_ext_all is not None:
         print(f'gemiddelde getijkrommen for {current_station}')
         pred_freq = "10s" # frequency influences the accuracy of havengetallen-scaling and is writing frequency of BOI timeseries
 
         # derive getijkrommes: raw, scaled to havengetallen, scaled to havengetallen and 12h25min period
-        gemgetij_raw = kw.calc_gemiddeldgetij(df_meas=data_pd_meas_10y, df_ext=None,
+        gemgetij_raw = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=None,
                                               freq=pred_freq, nb=0, nf=0, 
                                               scale_extremes=False, scale_period=False)
-        gemgetij_corr = kw.calc_gemiddeldgetij(df_meas=data_pd_meas_10y, df_ext=data_pd_HWLW_10y_12,
+        gemgetij_corr = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=df_ext_10y_12,
                                                freq=pred_freq, nb=1, nf=1, 
                                                scale_extremes=True, scale_period=False)
-        gemgetij_corr_boi = kw.calc_gemiddeldgetij(df_meas=data_pd_meas_10y, df_ext=data_pd_HWLW_10y_12,
+        gemgetij_corr_boi = kw.calc_gemiddeldgetij(df_meas=df_meas_10y, df_ext=df_ext_10y_12,
                                                    freq=pred_freq, nb=0, nf=4, 
                                                    scale_extremes=True, scale_period=True)
 
@@ -241,10 +239,11 @@
 
 
 
-    ###OVERSCHRIJDINGSFREQUENTIES
+    #### OVERSCHRIJDINGSFREQUENTIES
     # TODO: SLR trend correctie voor overschrijdingsfrequenties en evt ook voor andere KW?
     # TODO: resulting freqs seem to be shifted w.r.t. getijtafelboekje (mail PH 9-3-2022)
-    # plots beoordelen: rode lijn moet ongeveer verlengde zijn van groene, als die ineens omhoog piekt komt dat door hele extreme waardes die je dan vermoedelijk ook al ziet in je groene lijn
+    # plots beoordelen: rode lijn moet ongeveer verlengde zijn van groene, als die ineens 
+    # omhoog piekt komt dat door hele extreme waardes die je dan vermoedelijk ook al ziet in je groene lijn
 
     def initiate_dist_with_hydra_nl(station):
         """
@@ -283,32 +282,30 @@ def add_validation_dist(dist_dict, dist_type, station):
     freqs_interested = [5, 2, 1, 1/2, 1/5, 1/10, 1/20, 1/50, 1/100, 1/200,
                          1/500, 1/1000, 1/2000, 1/4000, 1/5000, 1/10000]
 
-    if compute_overschrijding and data_pd_HWLW_all is not None:
+    if compute_overschrijding and df_ext_all is not None:
         print(f'overschrijdingsfrequenties for {current_station}')
 
         # only include data up to year_slotgem
-        data_pd_measext = data_pd_HWLW_all_12.loc[:tstop_dt]
+        df_measext = df_ext_all_12.loc[:tstop_dt]
 
         # 1. Exceedance
         dist_exc_hydra = initiate_dist_with_hydra_nl(station=current_station)
-        dist_exc = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None, 
+        dist_exc = kw.calc_overschrijding(df_ext=df_measext, rule_type=None, rule_value=None, 
                                           clip_physical_break=True, dist=dist_exc_hydra,
                                           interp_freqs=freqs_interested)
         add_validation_dist(dist_exc, dist_type='exceedance', station=current_station)
         dist_exc['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}.csv'))
-        # dist_exc['Gecombineerd'].to_csv(os.path.join(dir_overschrijding, f'Exceedance_{current_station}_gecombineerd.csv'))
 
         fig, ax = kw.plot_overschrijding(dist_exc)
         ax.set_ylim(0,5.5)
         fig.savefig(os.path.join(dir_overschrijding, f'Exceedance_lines_{current_station}.png'))
 
         # 2. Deceedance
-        dist_dec = kw.calc_overschrijding(df_ext=data_pd_measext, rule_type=None, rule_value=None, 
+        dist_dec = kw.calc_overschrijding(df_ext=df_measext, rule_type=None, rule_value=None, 
                                           clip_physical_break=True, inverse=True,
                                           interp_freqs=freqs_interested)
         add_validation_dist(dist_dec, dist_type='deceedance', station=current_station)
         dist_dec['Geinterpoleerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}.csv'))
-        # dist_dec['Gecombineerd'].to_csv(os.path.join(dir_overschrijding, f'Deceedance_{current_station}_gecombineerd.csv'))
 
         fig, ax = kw.plot_overschrijding(dist_dec)
         fig.savefig(os.path.join(dir_overschrijding, f'Deceedance_lines_{current_station}.png'))
diff --git a/kenmerkendewaarden/data_analysis.py b/kenmerkendewaarden/data_analysis.py
@@ -66,13 +66,13 @@ def plot_measurements_amount(df: pd.DataFrame, relative: bool = False):
     return fig, ax
 
 
-def plot_measurements(df: pd.DataFrame, df_ext: pd.DataFrame = None):
+def plot_measurements(df_meas: pd.DataFrame, df_ext: pd.DataFrame = None):
     """
     Generate a timeseries figure for the measurement timeseries (and extremes) of this station.
 
     Parameters
     ----------
-    df : pd.DataFrame
+    df_meas : pd.DataFrame
         Dataframe with the measurement timeseries for a particular station.
     df_ext : pd.DataFrame, optional
         Dataframe with the measurement extremes for a particular station.
@@ -85,17 +85,17 @@ def plot_measurements(df: pd.DataFrame, df_ext: pd.DataFrame = None):
         Figure axis handle.
 
     """
-    station_df = df.attrs["station"]
+    station_df = df_meas.attrs["station"]
     if df_ext is not None:
         station_df_ext = df_ext.attrs["station"]
         assert station_df == station_df_ext
-        fig, (ax1, ax2) = hatyan.plot_timeseries(ts=df, ts_ext=df_ext)
+        fig, (ax1, ax2) = hatyan.plot_timeseries(ts=df_meas, ts_ext=df_ext)
     else:
-        fig, (ax1, ax2) = hatyan.plot_timeseries(ts=df)
+        fig, (ax1, ax2) = hatyan.plot_timeseries(ts=df_meas)
     ax1.set_title(f"timeseries for {station_df}")
 
     # calculate monthly/yearly mean for meas wl data
-    df_meas_values = df["values"]
+    df_meas_values = df_meas["values"]
     mean_peryearmonth_long = df_meas_values.groupby(
         pd.PeriodIndex(df_meas_values.index, freq="M")
     ).mean()

diff --git a/tests/test_data_analysis.py b/tests/test_data_analysis.py
@@ -97,7 +97,7 @@ def test_plot_measurements_amount(dir_meas_amount, extremes):
 
 @pytest.mark.unittest
 def test_plot_measurements(df_meas_2010, df_ext_2010):
-    kw.plot_measurements(df=df_meas_2010, df_ext=df_ext_2010)
+    kw.plot_measurements(df_meas=df_meas_2010, df_ext=df_ext_2010)
 
 
 @pytest.mark.unittest