From fcab9c0322700608f77df82ba642f76c2d86efaf Mon Sep 17 00:00:00 2001
From: Edwin Spee <34799234+epsig@users.noreply.github.com>
Date: Thu, 10 Oct 2024 09:14:51 +0200
Subject: [PATCH] add bounds argument and split log(a*exp(b)) (#149)

* add bounds argument and split log(a*exp(b))

* updated tests

* reverted formatting

* reverted formatting

---------

Co-authored-by: veenstrajelmer <jelmer.veenstra@deltares.nl>
---
 kenmerkendewaarden/overschrijding.py |  9 ++--
 tests/test_data_retrieve.py          |  8 +--
 tests/test_overschrijding.py         | 79 ++++++++++++++++------------
 tests/test_slotgemiddelden.py        | 10 ++--
 tests/test_tidalindicators.py        | 45 +++++++++++-----
 tests/test_utils.py                  | 13 +++--
 6 files changed, 101 insertions(+), 63 deletions(-)

diff --git a/kenmerkendewaarden/overschrijding.py b/kenmerkendewaarden/overschrijding.py
index 0b53620..a2698b3 100644
--- a/kenmerkendewaarden/overschrijding.py
+++ b/kenmerkendewaarden/overschrijding.py
@@ -5,6 +5,7 @@
 
 import pandas as pd
 import numpy as np
+import math
 import matplotlib.pyplot as plt
 from matplotlib import ticker
 from scipy import optimize, signal
@@ -318,17 +319,18 @@ def pfunc_inverse(p_X_gt_x, p_val_gt_threshold, threshold, sigma, alpha):
         ) ** (1 / alpha)
 
     def der_pfunc(x, p_val_gt_threshold, threshold, alpha, sigma):
-        return (
+        longexpra = (
             -p_val_gt_threshold
             * (alpha * x ** (alpha - 1))
             * (sigma ** (-alpha))
-            * np.exp(-((x / sigma) ** alpha) + ((threshold / sigma) ** alpha))
         )
+        longexprb = -((x / sigma) ** alpha) + ((threshold / sigma) ** alpha)
+        return np.log(-longexpra) + longexprb
 
     def cost_func(params, *args):
         return -np.sum(
             [
-                np.log(-der_pfunc(x, args[0], args[1], params[0], params[1]))
+                der_pfunc(x, args[0], args[1], params[0], params[1])
                 for x in args[2]
             ]
         )
@@ -339,6 +341,7 @@ def cost_func(params, *args):
         x0=initial_guess,
         args=(p_val_gt_threshold, threshold, values[values > threshold]),
         method="Nelder-Mead",
+        bounds = [[-math.inf, math.inf], [1e-10, math.inf]],
         options={"maxiter": 1e4},
     )
     if result.success:
diff --git a/tests/test_data_retrieve.py b/tests/test_data_retrieve.py
index 40d9cd1..08d9e33 100644
--- a/tests/test_data_retrieve.py
+++ b/tests/test_data_retrieve.py
@@ -33,10 +33,10 @@ def test_drop_duplicate_times(df_meas_2010, caplog):
 
     assert len(meas_duplicated) == 105120
     assert len(meas_clean) == 52560
-    
+
     # assert logging messages
-    assert '52530 rows with duplicated time-value-combinations dropped' in caplog.text
-    assert '30 rows with duplicated times dropped' in caplog.text
+    assert "52530 rows with duplicated time-value-combinations dropped" in caplog.text
+    assert "30 rows with duplicated times dropped" in caplog.text
 
 
 @pytest.mark.unittest
@@ -45,7 +45,7 @@ def test_drop_duplicate_times_noaction(df_meas_2010, caplog):
 
     assert len(df_meas_2010) == 52560
     assert len(meas_clean) == 52560
-    
+
     # assert that there is no logging messages
     assert caplog.text == ""
 
diff --git a/tests/test_overschrijding.py b/tests/test_overschrijding.py
index 2bc00db..8305e06 100644
--- a/tests/test_overschrijding.py
+++ b/tests/test_overschrijding.py
@@ -68,15 +68,15 @@ def test_calc_overschrijding(df_ext_12_2010_2014):
     expected_values = np.array(
         [
             1.93,
-            2.09327434,
-            2.26311592,
-            2.44480348,
-            2.70434509,
-            2.91627091,
-            3.14247786,
-            3.46480369,
-            3.72735283,
-            4.00701551,
+            2.09356726,
+            2.2637632,
+            2.44533302,
+            2.70383299,
+            2.91416492,
+            3.13795447,
+            3.45560027,
+            3.71330277,
+            3.98682045,
         ]
     )
     assert np.allclose(dist["geinterpoleerd"].values, expected_values)
@@ -96,11 +96,24 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014):
         1 / 100,
         1 / 200,
     ]
-    hydra_values = np.array([2.473, 3.18 , 4.043, 4.164, 4.358, 4.696, 5.056, 5.468, 5.865,
-           6.328, 7.207])
-    hydra_index = np.array([1.00000000e+00, 1.00000000e-01, 2.00000000e-02, 1.00000000e-02,
-           3.33333333e-03, 1.00000000e-03, 3.33333333e-04, 1.00000000e-04,
-           3.33333333e-05, 1.00000000e-05, 1.00000000e-06])
+    hydra_values = np.array(
+        [2.473, 3.18, 4.043, 4.164, 4.358, 4.696, 5.056, 5.468, 5.865, 6.328, 7.207]
+    )
+    hydra_index = np.array(
+        [
+            1.00000000e00,
+            1.00000000e-01,
+            2.00000000e-02,
+            1.00000000e-02,
+            3.33333333e-03,
+            1.00000000e-03,
+            3.33333333e-04,
+            1.00000000e-04,
+            3.33333333e-05,
+            1.00000000e-05,
+            1.00000000e-06,
+        ]
+    )
     ser_hydra = pd.Series(hydra_values, index=hydra_index)
     ser_hydra.attrs = df_ext_12_2010_2014.attrs
     dist_hydra = {"Hydra-NL": ser_hydra}
@@ -121,12 +134,12 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014):
     expected_values = np.array(
         [
             1.93,
-            2.09327434,
-            2.26311587,
-            2.46299612,
-            2.79965222,
-            3.08436295,
-            3.4987347,
+            2.09356726,
+            2.26376316,
+            2.46348569,
+            2.79932582,
+            3.08359924,
+            3.49814949,
             4.043,
             4.164,
             4.3095,
@@ -227,20 +240,18 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014):
     expected_values_normal = np.array(
         [
             1.93,
-            2.09327434,
-            2.26311592,
-            2.44480348,
-            2.70434509,
-            2.91627091,
-            3.14247786,
-            3.46480369,
-            3.72735283,
-            4.00701551,
+            2.09356726,
+            2.2637632,
+            2.44533302,
+            2.70383299,
+            2.91416492,
+            3.13795447,
+            3.45560027,
+            3.71330277,
+            3.98682045,
         ]
     )
-    assert np.allclose(
-        dist_normal["geinterpoleerd"].values, expected_values_normal
-    )
+    assert np.allclose(dist_normal["geinterpoleerd"].values, expected_values_normal)
     expected_values_clip = np.array(
         [
             1.93,
@@ -255,9 +266,7 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014):
             3.90683996,
         ]
     )
-    assert np.allclose(
-        dist_clip["geinterpoleerd"].values, expected_values_clip
-    )
+    assert np.allclose(dist_clip["geinterpoleerd"].values, expected_values_clip)
 
 
 @pytest.mark.unittest
diff --git a/tests/test_slotgemiddelden.py b/tests/test_slotgemiddelden.py
index b89f6bd..428de8e 100644
--- a/tests/test_slotgemiddelden.py
+++ b/tests/test_slotgemiddelden.py
@@ -69,6 +69,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
     assert set(slotgemiddelden_dict_noext.keys()) == set(expected_keys_noext)
 
     # assertion of values
+    # fmt: off
     wl_mean_peryear_expected = np.array([0.07960731, 0.08612119, 0.0853051,
                                          0.07010864, 0.10051922])
     hw_mean_peryear_expected = np.array([1.13968839, 1.12875177, 1.13988685,
@@ -77,6 +78,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
                                          -0.61334278, -0.58024113])
     range_mean_peryear_expected = np.array([1.74530541, 1.71964539, 1.73330976,
                                             1.75488888, 1.77022697])
+    # fmt: on
     assert np.allclose(
         slotgemiddelden_dict_inclext["wl_mean_peryear"].values, wl_mean_peryear_expected
     )
@@ -88,9 +90,10 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
     )
     assert np.allclose(
         slotgemiddelden_dict_inclext["tidalrange_mean_peryear"].values,
-        range_mean_peryear_expected
+        range_mean_peryear_expected,
     )
 
+    # fmt: off
     wl_model_fit_expected = np.array([0.0141927, 0.08612119, 0.0853051,
                                       0.07010864, 0.10051922, 0.23137634])
     hw_model_fit_expected = np.array([1.05295416, 1.12875177, 1.13988685,
@@ -99,6 +102,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
                                       -0.61334278, -0.58024113, -0.42969074])
     range_model_fit_expected = np.array([1.72715816, 1.71964539, 1.73330976,
                                          1.75488888, 1.77022697, 1.76587273])
+    # fmt: on
     assert np.allclose(
         slotgemiddelden_dict_inclext["wl_model_fit"].values, wl_model_fit_expected
     )
@@ -109,8 +113,8 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
         slotgemiddelden_dict_inclext["LW_model_fit"].values, lw_model_fit_expected
     )
     assert np.allclose(
-        slotgemiddelden_dict_inclext["tidalrange_model_fit"].values, 
-        range_model_fit_expected
+        slotgemiddelden_dict_inclext["tidalrange_model_fit"].values,
+        range_model_fit_expected,
     )
 
 
diff --git a/tests/test_tidalindicators.py b/tests/test_tidalindicators.py
index 90f1406..a6e46dc 100644
--- a/tests/test_tidalindicators.py
+++ b/tests/test_tidalindicators.py
@@ -42,26 +42,39 @@ def test_calc_HWLWtidalrange(df_ext_12_2010):
 
 @pytest.mark.unittest
 def test_calc_HWLWtidalindicators(df_ext_12_2010_2014):
-    ext_stats_notimezone = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014.tz_localize(None))
+    ext_stats_notimezone = kw.calc_HWLWtidalindicators(
+        df_ext_12_2010_2014.tz_localize(None)
+    )
     ext_stats = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014)
     ext_stats_min100 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=1)
-    ext_stats_min099 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=0.99)
-    ext_stats_min098 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=0.98)
-
-    expected_keys = ['HW_mean', 'LW_mean', 
-                     'HW_mean_peryear', 'LW_mean_peryear', 
-                     'HW_monthmax_permonth', 'LW_monthmin_permonth', 
-                     'HW_monthmax_mean_peryear', 'LW_monthmax_mean_peryear', 
-                     'HW_monthmin_mean_peryear', 'LW_monthmin_mean_peryear']
+    ext_stats_min099 = kw.calc_HWLWtidalindicators(
+        df_ext_12_2010_2014, min_coverage=0.99
+    )
+    ext_stats_min098 = kw.calc_HWLWtidalindicators(
+        df_ext_12_2010_2014, min_coverage=0.98
+    )
+
+    expected_keys = [
+        "HW_mean",
+        "LW_mean",
+        "HW_mean_peryear",
+        "LW_mean_peryear",
+        "HW_monthmax_permonth",
+        "LW_monthmin_permonth",
+        "HW_monthmax_mean_peryear",
+        "LW_monthmax_mean_peryear",
+        "HW_monthmin_mean_peryear",
+        "LW_monthmin_mean_peryear",
+    ]
     for key in expected_keys:
         assert key in ext_stats.keys()
         assert (ext_stats[key] == ext_stats_notimezone[key]).all()
 
-    assert ext_stats_notimezone['HW_monthmax_permonth'].isnull().sum() == 0
-    assert ext_stats['HW_monthmax_permonth'].isnull().sum() == 0
-    assert ext_stats_min100['HW_monthmax_permonth'].isnull().sum() == 1
-    assert ext_stats_min099['HW_monthmax_permonth'].isnull().sum() == 1
-    assert ext_stats_min098['HW_monthmax_permonth'].isnull().sum() == 0
+    assert ext_stats_notimezone["HW_monthmax_permonth"].isnull().sum() == 0
+    assert ext_stats["HW_monthmax_permonth"].isnull().sum() == 0
+    assert ext_stats_min100["HW_monthmax_permonth"].isnull().sum() == 1
+    assert ext_stats_min099["HW_monthmax_permonth"].isnull().sum() == 1
+    assert ext_stats_min098["HW_monthmax_permonth"].isnull().sum() == 0
 
 
 @pytest.mark.unittest
@@ -77,6 +90,7 @@ def test_calc_wltidalindicators(df_meas_2010_2014):
     wl_mean_peryear_expected = np.array(
         [0.07960731, 0.08612119, 0.0853051, 0.07010864, 0.10051922]
     )
+    # fmt: off
     wl_mean_permonth_expected = np.array([
         -0.00227151,  0.089313  ,  0.04443996, -0.03440509, -0.00206317,
          0.04431481,  0.03877688,  0.18267697,  0.13494907,  0.18367832,
@@ -90,6 +104,7 @@ def test_calc_wltidalindicators(df_meas_2010_2014):
          0.17321909,  0.23108102,  0.19502688,  0.06281138,  0.08588046,
         -0.00553763,  0.03490278,  0.03113575,  0.03134954,  0.10553763,
          0.16540771,  0.12535648,  0.20802195,  0.10014352,  0.25624552])
+    # fmt: on
     assert np.allclose(wl_stats["wl_mean_peryear"].values, wl_mean_peryear_expected)
     assert np.allclose(wl_stats["wl_mean_permonth"].values, wl_mean_permonth_expected)
 
@@ -221,6 +236,7 @@ def test_calc_wltidalindicators_ext(df_ext_12_2010_2014):
     assert np.allclose(ext_stats["HW_mean_peryear"].values, hw_mean_peryear_expected)
     assert np.allclose(ext_stats["LW_mean_peryear"].values, lw_mean_peryear_expected)
 
+    # fmt: off
     hw_monthmax_permonth_expected = np.array([
         1.94, 1.89, 1.86, 1.55, 1.74, 1.58, 1.54, 2.07, 2.11, 2.06, 1.9 ,
         1.75, 1.69, 1.82, 1.49, 1.39, 1.4 , 1.71, 1.72, 1.66, 1.69, 1.59,
@@ -236,6 +252,7 @@ def test_calc_wltidalindicators_ext(df_ext_12_2010_2014):
         -1.11, -1.65, -1.37, -1.11, -1.11, -1.05, -0.98, -1.07, -0.88,
         -1.05, -1.15, -1.07, -1.32, -1.31, -1.21, -1.08, -1.  , -1.03,
         -1.07, -0.83, -0.98, -0.97, -0.99, -1.3 ])
+    # fmt: on
     assert np.allclose(
         ext_stats["HW_monthmax_permonth"].values, hw_monthmax_permonth_expected
     )
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b2ca770..40b41de 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -12,8 +12,10 @@
 def test_raise_extremes_with_aggers_raise_12345df(df_ext):
     with pytest.raises(ValueError) as e:
         raise_extremes_with_aggers(df_ext)
-    assert ("df_ext should only contain extremes (HWLWcode 1/2), "
-            "but it also contains aggers (HWLWcode 3/4/5") in str(e.value)
+    assert (
+        "df_ext should only contain extremes (HWLWcode 1/2), "
+        "but it also contains aggers (HWLWcode 3/4/5"
+    ) in str(e.value)
 
 
 @pytest.mark.unittest
@@ -24,7 +26,10 @@ def test_raise_extremes_with_aggers_pass_12df(df_ext_12_2010):
 @pytest.mark.unittest
 def test_raise_extremes_with_aggers_emptydf():
     import pandas as pd
-    time_index = pd.DatetimeIndex([], dtype='datetime64[ns, Etc/GMT-1]', name='time', freq=None)
-    df_ext = pd.DataFrame({"HWLWcode":[]},index=time_index)
+
+    time_index = pd.DatetimeIndex(
+        [], dtype="datetime64[ns, Etc/GMT-1]", name="time", freq=None
+    )
+    df_ext = pd.DataFrame({"HWLWcode": []}, index=time_index)
     df_ext.attrs["station"] = "dummy"
     raise_extremes_with_aggers(df_ext=df_ext)