Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose mean hwlw for spring/neap tide #175

Merged
merged 6 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- support for timedelta `diff()` because of update to `pandas>=2.1.4` in #161
- neater handling of time in `kw.calc_havengetallen()` in #163
- automatic cropping of timeseries if required to simplify user interaction in #168
- exposed mean HW/LW during spring and neaptide with `kw.calc_HWLW_springneap()` in #175

### Deprecated
- deprecated debug argument for `kw.calc_gemiddeldgetij()` in #170
Expand Down
2 changes: 2 additions & 0 deletions examples/KWK_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
# compute and plot tidal indicators
dict_wltidalindicators = kw.calc_wltidalindicators(df_meas=df_meas_todate, min_coverage=min_coverage)
dict_HWLWtidalindicators = kw.calc_HWLWtidalindicators(df_ext=df_ext_todate, min_coverage=min_coverage)
dict_HWLW_springneap = kw.calc_HWLW_springneap(df_ext=df_ext_todate, min_coverage=min_coverage)

# add hat/lat
hat, lat = kw.calc_hat_lat_frommeasurements(df_meas_todate)
Expand All @@ -108,6 +109,7 @@

# merge dictionaries
dict_wltidalindicators.update(dict_HWLWtidalindicators)
dict_wltidalindicators.update(dict_HWLW_springneap)

# csv for yearlymonthly indicators
for key in ['wl_mean_peryear','wl_mean_permonth']:
Expand Down
100 changes: 77 additions & 23 deletions kenmerkendewaarden/havengetallen.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

__all__ = [
"calc_havengetallen",
"calc_HWLW_springneap",
"plot_HWLW_pertimeclass",
"plot_aardappelgrafiek",
]
Expand Down Expand Up @@ -73,30 +74,10 @@ def calc_havengetallen(
raise_extremes_with_aggers(df_ext)
df_ext_10y = crop_timeseries_last_nyears(df=df_ext, nyears=10)

# check if coverage is high enough for havengetallen
# check if coverage is high enough
if min_coverage is not None:
# TODO: compute_actual_counts only returns years for which there are no nans, so will have different length than expected counts if there is an all-nan year
# TODO: if we supply 4 years of complete data instead of 10 years, no error is raised
data_pd_hw = df_ext_10y.loc[df_ext_10y["HWLWcode"] == 1]["values"]
df_actual_counts_peryear = compute_actual_counts(data_pd_hw, freq="Y")
df_expected_counts_peryear = compute_expected_counts(data_pd_hw, freq="Y")
# floor expected counts to avoid rounding issues
df_expected_counts_peryear = df_expected_counts_peryear.apply(np.floor)
df_min_counts_peryear = df_expected_counts_peryear * min_coverage
bool_coverage_toolow = df_actual_counts_peryear < df_min_counts_peryear
df_debug = pd.DataFrame(
{
"#required": df_min_counts_peryear,
"#actual": df_actual_counts_peryear,
"too little": bool_coverage_toolow,
}
)
if bool_coverage_toolow.any():
raise ValueError(
f"coverage of some years is lower than "
f"min_coverage={min_coverage}:\n{df_debug}"
)

check_min_coverage_extremes(df_ext=df_ext_10y, min_coverage=min_coverage)

current_station = df_ext_10y.attrs["station"]
logger.info(f"computing havengetallen for {current_station}")
df_ext_culm = calc_hwlw_moonculm_combi(df_ext=df_ext_10y, moonculm_offset=moonculm_offset)
Expand All @@ -106,6 +87,79 @@ def calc_havengetallen(
return df_havengetallen, df_ext_culm
else:
return df_havengetallen


def calc_HWLW_springneap(
df_ext: pd.DataFrame,
min_coverage=None,
moonculm_offset: int = 4):
raise_extremes_with_aggers(df_ext)

# TODO: moonculminations cannot be computed before 1900
if df_ext.index.min().year < 1901:
logger.warning("calc_HWLW_springneap() only supports timestamps after 1900 "
"all older data will be ignored")
df_ext = df_ext.loc["1901":]

# check if coverage is high enough
if min_coverage is not None:
check_min_coverage_extremes(df_ext=df_ext, min_coverage=min_coverage)

current_station = df_ext.attrs["station"]
logger.info(f"computing HWLW for spring/neap tide for {current_station}")
df_ext_culm = calc_hwlw_moonculm_combi(df_ext=df_ext, moonculm_offset=moonculm_offset)

# all HW/LW at spring/neaptide
bool_hw = df_ext_culm["HWLWcode"] == 1
bool_lw = df_ext_culm["HWLWcode"] == 2
bool_spring = df_ext_culm["culm_hr"] == 0
bool_neap = df_ext_culm["culm_hr"] == 6
hw_spring = df_ext_culm.loc[bool_hw & bool_spring]['values']
lw_spring = df_ext_culm.loc[bool_lw & bool_spring]['values']
hw_neap = df_ext_culm.loc[bool_hw & bool_neap]['values']
lw_neap = df_ext_culm.loc[bool_lw & bool_neap]['values']

# mean HW/LW at spring/neap tide
pi_hw_sp_y = pd.PeriodIndex(hw_spring.index, freq="Y")
pi_lw_sp_y = pd.PeriodIndex(lw_spring.index, freq="Y")
pi_hw_np_y = pd.PeriodIndex(hw_neap.index, freq="Y")
pi_lw_np_y = pd.PeriodIndex(lw_neap.index, freq="Y")
hw_spring_peryear = hw_spring.groupby(pi_hw_sp_y).mean()
lw_spring_peryear = lw_spring.groupby(pi_lw_sp_y).mean()
hw_neap_peryear = hw_neap.groupby(pi_hw_np_y).mean()
lw_neap_peryear = lw_neap.groupby(pi_lw_np_y).mean()

# merge in dict
dict_hwlw_springneap = {
"HW_spring_mean_peryear": hw_spring_peryear,
"LW_spring_mean_peryear": lw_spring_peryear,
"HW_neap_mean_peryear": hw_neap_peryear,
"LW_neap_mean_peryear": lw_neap_peryear,
}
return dict_hwlw_springneap


def check_min_coverage_extremes(df_ext, min_coverage):
# TODO: compute_actual_counts only returns years for which there are no nans, so will have different length than expected counts if there is an all-nan year
# TODO: if we supply 4 years of complete data instead of 10 years, no error is raised
data_pd_hw = df_ext.loc[df_ext["HWLWcode"] == 1]["values"]
df_actual_counts_peryear = compute_actual_counts(data_pd_hw, freq="Y")
df_expected_counts_peryear = compute_expected_counts(data_pd_hw, freq="Y")
# floor expected counts to avoid rounding issues
df_expected_counts_peryear = df_expected_counts_peryear.apply(np.floor)
df_min_counts_peryear = df_expected_counts_peryear * min_coverage
bool_coverage_toolow = df_actual_counts_peryear < df_min_counts_peryear
df_debug = pd.DataFrame(
{
"#required": df_min_counts_peryear.loc[bool_coverage_toolow],
"#actual": df_actual_counts_peryear.loc[bool_coverage_toolow],
}
)
if bool_coverage_toolow.any():
raise ValueError(
f"coverage of some years is lower than "
f"min_coverage={min_coverage}:\n{df_debug}"
)


def get_moonculm_idxHWLWno(tstart, tstop):
Expand Down
29 changes: 29 additions & 0 deletions tests/test_havengetallen.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,35 @@ def test_calc_havengetallen_toolittle_data(df_ext_12_2010_2014):
assert "coverage of some years is lower than min_coverage" in str(e.value)


@pytest.mark.unittest
def test_calc_HWLW_springneap(df_ext_12_2010_2014):
dict_spnp = kw.calc_HWLW_springneap(df_ext_12_2010_2014)
df_columns = ['HW_spring_mean_peryear', 'LW_spring_mean_peryear',
'HW_neap_mean_peryear', 'LW_neap_mean_peryear']
assert set(dict_spnp.keys()) == set(df_columns)

for key in df_columns:
years_act = dict_spnp[key].index.year.tolist()
years_exp = [2010, 2011, 2012, 2013, 2014]
assert years_act == years_exp

vals_act = dict_spnp['HW_spring_mean_peryear'].values
vals_exp = np.array([1.33551724, 1.28111111, 1.29563636, 1.33185185, 1.37745455])
assert np.allclose(vals_act, vals_exp)

vals_act = dict_spnp['LW_spring_mean_peryear'].values
vals_exp = np.array([-0.59724138, -0.6212963, -0.61872727, -0.60407407, -0.60145455])
assert np.allclose(vals_act, vals_exp)

vals_act = dict_spnp['HW_neap_mean_peryear'].values
vals_exp = np.array([0.83887097, 0.95854839, 0.86225806, 0.87903226, 0.9696875])
assert np.allclose(vals_act, vals_exp)

vals_act = dict_spnp['LW_neap_mean_peryear'].values
vals_exp = np.array([-0.62919355, -0.46758065, -0.5633871 , -0.60193548, -0.51421875])
assert np.allclose(vals_act, vals_exp)


@pytest.mark.unittest
def test_calc_HWLW_culmhr_summary_tidalcoeff(df_ext_12_2010):
"""
Expand Down
Loading