From fcab9c0322700608f77df82ba642f76c2d86efaf Mon Sep 17 00:00:00 2001 From: Edwin Spee <34799234+epsig@users.noreply.github.com> Date: Thu, 10 Oct 2024 09:14:51 +0200 Subject: [PATCH] add bounds argument and split log(a*exp(b)) (#149) * add bounds argument and split log(a*exp(b)) * updated tests * reverted formatting * reverted formatting --------- Co-authored-by: veenstrajelmer --- kenmerkendewaarden/overschrijding.py | 9 ++-- tests/test_data_retrieve.py | 8 +-- tests/test_overschrijding.py | 79 ++++++++++++++++------------ tests/test_slotgemiddelden.py | 10 ++-- tests/test_tidalindicators.py | 45 +++++++++++----- tests/test_utils.py | 13 +++-- 6 files changed, 101 insertions(+), 63 deletions(-) diff --git a/kenmerkendewaarden/overschrijding.py b/kenmerkendewaarden/overschrijding.py index 0b53620..a2698b3 100644 --- a/kenmerkendewaarden/overschrijding.py +++ b/kenmerkendewaarden/overschrijding.py @@ -5,6 +5,7 @@ import pandas as pd import numpy as np +import math import matplotlib.pyplot as plt from matplotlib import ticker from scipy import optimize, signal @@ -318,17 +319,18 @@ def pfunc_inverse(p_X_gt_x, p_val_gt_threshold, threshold, sigma, alpha): ) ** (1 / alpha) def der_pfunc(x, p_val_gt_threshold, threshold, alpha, sigma): - return ( + longexpra = ( -p_val_gt_threshold * (alpha * x ** (alpha - 1)) * (sigma ** (-alpha)) - * np.exp(-((x / sigma) ** alpha) + ((threshold / sigma) ** alpha)) ) + longexprb = -((x / sigma) ** alpha) + ((threshold / sigma) ** alpha) + return np.log(-longexpra) + longexprb def cost_func(params, *args): return -np.sum( [ - np.log(-der_pfunc(x, args[0], args[1], params[0], params[1])) + der_pfunc(x, args[0], args[1], params[0], params[1]) for x in args[2] ] ) @@ -339,6 +341,7 @@ def cost_func(params, *args): x0=initial_guess, args=(p_val_gt_threshold, threshold, values[values > threshold]), method="Nelder-Mead", + bounds = [[-math.inf, math.inf], [1e-10, math.inf]], options={"maxiter": 1e4}, ) if result.success: diff --git a/tests/test_data_retrieve.py b/tests/test_data_retrieve.py index 40d9cd1..08d9e33 100644 --- a/tests/test_data_retrieve.py +++ b/tests/test_data_retrieve.py @@ -33,10 +33,10 @@ def test_drop_duplicate_times(df_meas_2010, caplog): assert len(meas_duplicated) == 105120 assert len(meas_clean) == 52560 - + # assert logging messages - assert '52530 rows with duplicated time-value-combinations dropped' in caplog.text - assert '30 rows with duplicated times dropped' in caplog.text + assert "52530 rows with duplicated time-value-combinations dropped" in caplog.text + assert "30 rows with duplicated times dropped" in caplog.text @pytest.mark.unittest @@ -45,7 +45,7 @@ def test_drop_duplicate_times_noaction(df_meas_2010, caplog): assert len(df_meas_2010) == 52560 assert len(meas_clean) == 52560 - + # assert that there is no logging messages assert caplog.text == "" diff --git a/tests/test_overschrijding.py b/tests/test_overschrijding.py index 2bc00db..8305e06 100644 --- a/tests/test_overschrijding.py +++ b/tests/test_overschrijding.py @@ -68,15 +68,15 @@ def test_calc_overschrijding(df_ext_12_2010_2014): expected_values = np.array( [ 1.93, - 2.09327434, - 2.26311592, - 2.44480348, - 2.70434509, - 2.91627091, - 3.14247786, - 3.46480369, - 3.72735283, - 4.00701551, + 2.09356726, + 2.2637632, + 2.44533302, + 2.70383299, + 2.91416492, + 3.13795447, + 3.45560027, + 3.71330277, + 3.98682045, ] ) assert np.allclose(dist["geinterpoleerd"].values, expected_values) @@ -96,11 +96,24 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014): 1 / 100, 1 / 200, ] - hydra_values = np.array([2.473, 3.18 , 4.043, 4.164, 4.358, 4.696, 5.056, 5.468, 5.865, - 6.328, 7.207]) - hydra_index = np.array([1.00000000e+00, 1.00000000e-01, 2.00000000e-02, 1.00000000e-02, - 3.33333333e-03, 1.00000000e-03, 3.33333333e-04, 1.00000000e-04, - 3.33333333e-05, 1.00000000e-05, 1.00000000e-06]) + hydra_values = np.array( + [2.473, 3.18, 4.043, 4.164, 4.358, 4.696, 5.056, 5.468, 5.865, 6.328, 7.207] + ) + hydra_index = np.array( + [ + 1.00000000e00, + 1.00000000e-01, + 2.00000000e-02, + 1.00000000e-02, + 3.33333333e-03, + 1.00000000e-03, + 3.33333333e-04, + 1.00000000e-04, + 3.33333333e-05, + 1.00000000e-05, + 1.00000000e-06, + ] + ) ser_hydra = pd.Series(hydra_values, index=hydra_index) ser_hydra.attrs = df_ext_12_2010_2014.attrs dist_hydra = {"Hydra-NL": ser_hydra} @@ -121,12 +134,12 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014): expected_values = np.array( [ 1.93, - 2.09327434, - 2.26311587, - 2.46299612, - 2.79965222, - 3.08436295, - 3.4987347, + 2.09356726, + 2.26376316, + 2.46348569, + 2.79932582, + 3.08359924, + 3.49814949, 4.043, 4.164, 4.3095, @@ -227,20 +240,18 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014): expected_values_normal = np.array( [ 1.93, - 2.09327434, - 2.26311592, - 2.44480348, - 2.70434509, - 2.91627091, - 3.14247786, - 3.46480369, - 3.72735283, - 4.00701551, + 2.09356726, + 2.2637632, + 2.44533302, + 2.70383299, + 2.91416492, + 3.13795447, + 3.45560027, + 3.71330277, + 3.98682045, ] ) - assert np.allclose( - dist_normal["geinterpoleerd"].values, expected_values_normal - ) + assert np.allclose(dist_normal["geinterpoleerd"].values, expected_values_normal) expected_values_clip = np.array( [ 1.93, @@ -255,9 +266,7 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014): 3.90683996, ] ) - assert np.allclose( - dist_clip["geinterpoleerd"].values, expected_values_clip - ) + assert np.allclose(dist_clip["geinterpoleerd"].values, expected_values_clip) @pytest.mark.unittest diff --git a/tests/test_slotgemiddelden.py b/tests/test_slotgemiddelden.py index b89f6bd..428de8e 100644 --- a/tests/test_slotgemiddelden.py +++ b/tests/test_slotgemiddelden.py @@ -69,6 +69,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014): assert set(slotgemiddelden_dict_noext.keys()) == set(expected_keys_noext) # assertion of values + # fmt: off wl_mean_peryear_expected = np.array([0.07960731, 0.08612119, 0.0853051, 0.07010864, 0.10051922]) hw_mean_peryear_expected = np.array([1.13968839, 1.12875177, 1.13988685, @@ -77,6 +78,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014): -0.61334278, -0.58024113]) range_mean_peryear_expected = np.array([1.74530541, 1.71964539, 1.73330976, 1.75488888, 1.77022697]) + # fmt: on assert np.allclose( slotgemiddelden_dict_inclext["wl_mean_peryear"].values, wl_mean_peryear_expected ) @@ -88,9 +90,10 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014): ) assert np.allclose( slotgemiddelden_dict_inclext["tidalrange_mean_peryear"].values, - range_mean_peryear_expected + range_mean_peryear_expected, ) + # fmt: off wl_model_fit_expected = np.array([0.0141927, 0.08612119, 0.0853051, 0.07010864, 0.10051922, 0.23137634]) hw_model_fit_expected = np.array([1.05295416, 1.12875177, 1.13988685, @@ -99,6 +102,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014): -0.61334278, -0.58024113, -0.42969074]) range_model_fit_expected = np.array([1.72715816, 1.71964539, 1.73330976, 1.75488888, 1.77022697, 1.76587273]) + # fmt: on assert np.allclose( slotgemiddelden_dict_inclext["wl_model_fit"].values, wl_model_fit_expected ) @@ -109,8 +113,8 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014): slotgemiddelden_dict_inclext["LW_model_fit"].values, lw_model_fit_expected ) assert np.allclose( - slotgemiddelden_dict_inclext["tidalrange_model_fit"].values, - range_model_fit_expected + slotgemiddelden_dict_inclext["tidalrange_model_fit"].values, + range_model_fit_expected, ) diff --git a/tests/test_tidalindicators.py b/tests/test_tidalindicators.py index 90f1406..a6e46dc 100644 --- a/tests/test_tidalindicators.py +++ b/tests/test_tidalindicators.py @@ -42,26 +42,39 @@ def test_calc_HWLWtidalrange(df_ext_12_2010): @pytest.mark.unittest def test_calc_HWLWtidalindicators(df_ext_12_2010_2014): - ext_stats_notimezone = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014.tz_localize(None)) + ext_stats_notimezone = kw.calc_HWLWtidalindicators( + df_ext_12_2010_2014.tz_localize(None) + ) ext_stats = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014) ext_stats_min100 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=1) - ext_stats_min099 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=0.99) - ext_stats_min098 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=0.98) - - expected_keys = ['HW_mean', 'LW_mean', - 'HW_mean_peryear', 'LW_mean_peryear', - 'HW_monthmax_permonth', 'LW_monthmin_permonth', - 'HW_monthmax_mean_peryear', 'LW_monthmax_mean_peryear', - 'HW_monthmin_mean_peryear', 'LW_monthmin_mean_peryear'] + ext_stats_min099 = kw.calc_HWLWtidalindicators( + df_ext_12_2010_2014, min_coverage=0.99 + ) + ext_stats_min098 = kw.calc_HWLWtidalindicators( + df_ext_12_2010_2014, min_coverage=0.98 + ) + + expected_keys = [ + "HW_mean", + "LW_mean", + "HW_mean_peryear", + "LW_mean_peryear", + "HW_monthmax_permonth", + "LW_monthmin_permonth", + "HW_monthmax_mean_peryear", + "LW_monthmax_mean_peryear", + "HW_monthmin_mean_peryear", + "LW_monthmin_mean_peryear", + ] for key in expected_keys: assert key in ext_stats.keys() assert (ext_stats[key] == ext_stats_notimezone[key]).all() - assert ext_stats_notimezone['HW_monthmax_permonth'].isnull().sum() == 0 - assert ext_stats['HW_monthmax_permonth'].isnull().sum() == 0 - assert ext_stats_min100['HW_monthmax_permonth'].isnull().sum() == 1 - assert ext_stats_min099['HW_monthmax_permonth'].isnull().sum() == 1 - assert ext_stats_min098['HW_monthmax_permonth'].isnull().sum() == 0 + assert ext_stats_notimezone["HW_monthmax_permonth"].isnull().sum() == 0 + assert ext_stats["HW_monthmax_permonth"].isnull().sum() == 0 + assert ext_stats_min100["HW_monthmax_permonth"].isnull().sum() == 1 + assert ext_stats_min099["HW_monthmax_permonth"].isnull().sum() == 1 + assert ext_stats_min098["HW_monthmax_permonth"].isnull().sum() == 0 @pytest.mark.unittest @@ -77,6 +90,7 @@ def test_calc_wltidalindicators(df_meas_2010_2014): wl_mean_peryear_expected = np.array( [0.07960731, 0.08612119, 0.0853051, 0.07010864, 0.10051922] ) + # fmt: off wl_mean_permonth_expected = np.array([ -0.00227151, 0.089313 , 0.04443996, -0.03440509, -0.00206317, 0.04431481, 0.03877688, 0.18267697, 0.13494907, 0.18367832, @@ -90,6 +104,7 @@ def test_calc_wltidalindicators(df_meas_2010_2014): 0.17321909, 0.23108102, 0.19502688, 0.06281138, 0.08588046, -0.00553763, 0.03490278, 0.03113575, 0.03134954, 0.10553763, 0.16540771, 0.12535648, 0.20802195, 0.10014352, 0.25624552]) + # fmt: on assert np.allclose(wl_stats["wl_mean_peryear"].values, wl_mean_peryear_expected) assert np.allclose(wl_stats["wl_mean_permonth"].values, wl_mean_permonth_expected) @@ -221,6 +236,7 @@ def test_calc_wltidalindicators_ext(df_ext_12_2010_2014): assert np.allclose(ext_stats["HW_mean_peryear"].values, hw_mean_peryear_expected) assert np.allclose(ext_stats["LW_mean_peryear"].values, lw_mean_peryear_expected) + # fmt: off hw_monthmax_permonth_expected = np.array([ 1.94, 1.89, 1.86, 1.55, 1.74, 1.58, 1.54, 2.07, 2.11, 2.06, 1.9 , 1.75, 1.69, 1.82, 1.49, 1.39, 1.4 , 1.71, 1.72, 1.66, 1.69, 1.59, @@ -236,6 +252,7 @@ def test_calc_wltidalindicators_ext(df_ext_12_2010_2014): -1.11, -1.65, -1.37, -1.11, -1.11, -1.05, -0.98, -1.07, -0.88, -1.05, -1.15, -1.07, -1.32, -1.31, -1.21, -1.08, -1. , -1.03, -1.07, -0.83, -0.98, -0.97, -0.99, -1.3 ]) + # fmt: on assert np.allclose( ext_stats["HW_monthmax_permonth"].values, hw_monthmax_permonth_expected ) diff --git a/tests/test_utils.py b/tests/test_utils.py index b2ca770..40b41de 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -12,8 +12,10 @@ def test_raise_extremes_with_aggers_raise_12345df(df_ext): with pytest.raises(ValueError) as e: raise_extremes_with_aggers(df_ext) - assert ("df_ext should only contain extremes (HWLWcode 1/2), " - "but it also contains aggers (HWLWcode 3/4/5") in str(e.value) + assert ( + "df_ext should only contain extremes (HWLWcode 1/2), " + "but it also contains aggers (HWLWcode 3/4/5" + ) in str(e.value) @pytest.mark.unittest @@ -24,7 +26,10 @@ def test_raise_extremes_with_aggers_pass_12df(df_ext_12_2010): @pytest.mark.unittest def test_raise_extremes_with_aggers_emptydf(): import pandas as pd - time_index = pd.DatetimeIndex([], dtype='datetime64[ns, Etc/GMT-1]', name='time', freq=None) - df_ext = pd.DataFrame({"HWLWcode":[]},index=time_index) + + time_index = pd.DatetimeIndex( + [], dtype="datetime64[ns, Etc/GMT-1]", name="time", freq=None + ) + df_ext = pd.DataFrame({"HWLWcode": []}, index=time_index) df_ext.attrs["station"] = "dummy" raise_extremes_with_aggers(df_ext=df_ext)