diff --git a/docs/whats-new.md b/docs/whats-new.md index 2739a07..e7a83c8 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -9,6 +9,7 @@ - support for timedelta `diff()` because of update to `pandas>=2.1.4` in #161 - neater handling of time in `kw.calc_havengetallen()` in #163 - automatic cropping of timeseries if required to simplify user interaction in #168 +- exposed mean HW/LW during spring and neaptide with `kw.calc_HWLW_springneap()` in #175 ### Deprecated - deprecated debug argument for `kw.calc_gemiddeldgetij()` in #170 diff --git a/examples/KWK_process.py b/examples/KWK_process.py index c1f13e4..48ea492 100644 --- a/examples/KWK_process.py +++ b/examples/KWK_process.py @@ -100,6 +100,7 @@ # compute and plot tidal indicators dict_wltidalindicators = kw.calc_wltidalindicators(df_meas=df_meas_todate, min_coverage=min_coverage) dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(df_ext=df_ext_todate, min_coverage=min_coverage) + dict_HWLW_springneap = kw.calc_HWLW_springneap(df_ext=df_ext_todate, min_coverage=min_coverage) # add hat/lat hat, lat = kw.calc_hat_lat_frommeasurements(df_meas_todate) @@ -108,6 +109,7 @@ # merge dictionaries dict_wltidalindicators.update(dict_HWLWtidalindicators) + dict_wltidalindicators.update(dict_HWLW_springneap) # csv for yearlymonthly indicators for key in ['wl_mean_peryear','wl_mean_permonth']: diff --git a/kenmerkendewaarden/havengetallen.py b/kenmerkendewaarden/havengetallen.py index f9b198d..ec37d2c 100644 --- a/kenmerkendewaarden/havengetallen.py +++ b/kenmerkendewaarden/havengetallen.py @@ -24,6 +24,7 @@ __all__ = [ "calc_havengetallen", + "calc_HWLW_springneap", "plot_HWLW_pertimeclass", "plot_aardappelgrafiek", ] @@ -73,30 +74,10 @@ def calc_havengetallen( raise_extremes_with_aggers(df_ext) df_ext_10y = crop_timeseries_last_nyears(df=df_ext, nyears=10) - # check if coverage is high enough for havengetallen + # check if coverage is high enough if min_coverage is not None: - # TODO: compute_actual_counts only returns years for which there are no nans, so will have different length than expected counts if there is an all-nan year - # TODO: if we supply 4 years of complete data instead of 10 years, no error is raised - data_pd_hw = df_ext_10y.loc[df_ext_10y["HWLWcode"] == 1]["values"] - df_actual_counts_peryear = compute_actual_counts(data_pd_hw, freq="Y") - df_expected_counts_peryear = compute_expected_counts(data_pd_hw, freq="Y") - # floor expected counts to avoid rounding issues - df_expected_counts_peryear = df_expected_counts_peryear.apply(np.floor) - df_min_counts_peryear = df_expected_counts_peryear * min_coverage - bool_coverage_toolow = df_actual_counts_peryear < df_min_counts_peryear - df_debug = pd.DataFrame( - { - "#required": df_min_counts_peryear, - "#actual": df_actual_counts_peryear, - "too little": bool_coverage_toolow, - } - ) - if bool_coverage_toolow.any(): - raise ValueError( - f"coverage of some years is lower than " - f"min_coverage={min_coverage}:\n{df_debug}" - ) - + check_min_coverage_extremes(df_ext=df_ext_10y, min_coverage=min_coverage) + current_station = df_ext_10y.attrs["station"] logger.info(f"computing havengetallen for {current_station}") df_ext_culm = calc_hwlw_moonculm_combi(df_ext=df_ext_10y, moonculm_offset=moonculm_offset) @@ -106,6 +87,79 @@ def calc_havengetallen( return df_havengetallen, df_ext_culm else: return df_havengetallen + + +def calc_HWLW_springneap( + df_ext: pd.DataFrame, + min_coverage=None, + moonculm_offset: int = 4): + raise_extremes_with_aggers(df_ext) + + # TODO: moonculminations cannot be computed before 1900 + if df_ext.index.min().year < 1901: + logger.warning("calc_HWLW_springneap() only supports timestamps after 1900 " + "all older data will be ignored") + df_ext = df_ext.loc["1901":] + + # check if coverage is high enough + if min_coverage is not None: + check_min_coverage_extremes(df_ext=df_ext, min_coverage=min_coverage) + + current_station = df_ext.attrs["station"] + logger.info(f"computing HWLW for spring/neap tide for {current_station}") + df_ext_culm = calc_hwlw_moonculm_combi(df_ext=df_ext, moonculm_offset=moonculm_offset) + + # all HW/LW at spring/neaptide + bool_hw = df_ext_culm["HWLWcode"] == 1 + bool_lw = df_ext_culm["HWLWcode"] == 2 + bool_spring = df_ext_culm["culm_hr"] == 0 + bool_neap = df_ext_culm["culm_hr"] == 6 + hw_spring = df_ext_culm.loc[bool_hw & bool_spring]['values'] + lw_spring = df_ext_culm.loc[bool_lw & bool_spring]['values'] + hw_neap = df_ext_culm.loc[bool_hw & bool_neap]['values'] + lw_neap = df_ext_culm.loc[bool_lw & bool_neap]['values'] + + # mean HW/LW at spring/neap tide + pi_hw_sp_y = pd.PeriodIndex(hw_spring.index, freq="Y") + pi_lw_sp_y = pd.PeriodIndex(lw_spring.index, freq="Y") + pi_hw_np_y = pd.PeriodIndex(hw_neap.index, freq="Y") + pi_lw_np_y = pd.PeriodIndex(lw_neap.index, freq="Y") + hw_spring_peryear = hw_spring.groupby(pi_hw_sp_y).mean() + lw_spring_peryear = lw_spring.groupby(pi_lw_sp_y).mean() + hw_neap_peryear = hw_neap.groupby(pi_hw_np_y).mean() + lw_neap_peryear = lw_neap.groupby(pi_lw_np_y).mean() + + # merge in dict + dict_hwlw_springneap = { + "HW_spring_mean_peryear": hw_spring_peryear, + "LW_spring_mean_peryear": lw_spring_peryear, + "HW_neap_mean_peryear": hw_neap_peryear, + "LW_neap_mean_peryear": lw_neap_peryear, + } + return dict_hwlw_springneap + + +def check_min_coverage_extremes(df_ext, min_coverage): + # TODO: compute_actual_counts only returns years for which there are no nans, so will have different length than expected counts if there is an all-nan year + # TODO: if we supply 4 years of complete data instead of 10 years, no error is raised + data_pd_hw = df_ext.loc[df_ext["HWLWcode"] == 1]["values"] + df_actual_counts_peryear = compute_actual_counts(data_pd_hw, freq="Y") + df_expected_counts_peryear = compute_expected_counts(data_pd_hw, freq="Y") + # floor expected counts to avoid rounding issues + df_expected_counts_peryear = df_expected_counts_peryear.apply(np.floor) + df_min_counts_peryear = df_expected_counts_peryear * min_coverage + bool_coverage_toolow = df_actual_counts_peryear < df_min_counts_peryear + df_debug = pd.DataFrame( + { + "#required": df_min_counts_peryear.loc[bool_coverage_toolow], + "#actual": df_actual_counts_peryear.loc[bool_coverage_toolow], + } + ) + if bool_coverage_toolow.any(): + raise ValueError( + f"coverage of some years is lower than " + f"min_coverage={min_coverage}:\n{df_debug}" + ) def get_moonculm_idxHWLWno(tstart, tstop): diff --git a/tests/test_havengetallen.py b/tests/test_havengetallen.py index 1dbf4b7..4018363 100644 --- a/tests/test_havengetallen.py +++ b/tests/test_havengetallen.py @@ -152,6 +152,35 @@ def test_calc_havengetallen_toolittle_data(df_ext_12_2010_2014): assert "coverage of some years is lower than min_coverage" in str(e.value) +@pytest.mark.unittest +def test_calc_HWLW_springneap(df_ext_12_2010_2014): + dict_spnp = kw.calc_HWLW_springneap(df_ext_12_2010_2014) + df_columns = ['HW_spring_mean_peryear', 'LW_spring_mean_peryear', + 'HW_neap_mean_peryear', 'LW_neap_mean_peryear'] + assert set(dict_spnp.keys()) == set(df_columns) + + for key in df_columns: + years_act = dict_spnp[key].index.year.tolist() + years_exp = [2010, 2011, 2012, 2013, 2014] + assert years_act == years_exp + + vals_act = dict_spnp['HW_spring_mean_peryear'].values + vals_exp = np.array([1.33551724, 1.28111111, 1.29563636, 1.33185185, 1.37745455]) + assert np.allclose(vals_act, vals_exp) + + vals_act = dict_spnp['LW_spring_mean_peryear'].values + vals_exp = np.array([-0.59724138, -0.6212963, -0.61872727, -0.60407407, -0.60145455]) + assert np.allclose(vals_act, vals_exp) + + vals_act = dict_spnp['HW_neap_mean_peryear'].values + vals_exp = np.array([0.83887097, 0.95854839, 0.86225806, 0.87903226, 0.9696875]) + assert np.allclose(vals_act, vals_exp) + + vals_act = dict_spnp['LW_neap_mean_peryear'].values + vals_exp = np.array([-0.62919355, -0.46758065, -0.5633871 , -0.60193548, -0.51421875]) + assert np.allclose(vals_act, vals_exp) + + @pytest.mark.unittest def test_calc_HWLW_culmhr_summary_tidalcoeff(df_ext_12_2010): """