Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add bounds argument and split log(a*exp(b)) #149

Merged
merged 4 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions kenmerkendewaarden/overschrijding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib import ticker
from scipy import optimize, signal
Expand Down Expand Up @@ -318,17 +319,18 @@ def pfunc_inverse(p_X_gt_x, p_val_gt_threshold, threshold, sigma, alpha):
) ** (1 / alpha)

def der_pfunc(x, p_val_gt_threshold, threshold, alpha, sigma):
return (
longexpra = (
-p_val_gt_threshold
* (alpha * x ** (alpha - 1))
* (sigma ** (-alpha))
* np.exp(-((x / sigma) ** alpha) + ((threshold / sigma) ** alpha))
)
longexprb = -((x / sigma) ** alpha) + ((threshold / sigma) ** alpha)
return np.log(-longexpra) + longexprb

def cost_func(params, *args):
return -np.sum(
[
np.log(-der_pfunc(x, args[0], args[1], params[0], params[1]))
der_pfunc(x, args[0], args[1], params[0], params[1])
for x in args[2]
]
)
Expand All @@ -339,6 +341,7 @@ def cost_func(params, *args):
x0=initial_guess,
args=(p_val_gt_threshold, threshold, values[values > threshold]),
method="Nelder-Mead",
bounds = [[-math.inf, math.inf], [1e-10, math.inf]],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think math.inf can be replaced with None which might be slightly neater since it reverts to the default bounds in that case (which are most probably +/- math.inf)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried a few thinks like you suggested, but ended up with errors as: 'NoneType' object is not iterable, so I keep it as it is.

Copy link
Collaborator

@veenstrajelmer veenstrajelmer Oct 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I should have been more clear, my suggestion was to use bounds = [[None, None], [1e-10, None]],. I think I successfully ran the testbank with this, but you might have done another test? Either way, since the PR is already merged I thing it is fine to leave it as is for now. Thanks for fixing this issue!

options={"maxiter": 1e4},
)
if result.success:
Expand Down
8 changes: 4 additions & 4 deletions tests/test_data_retrieve.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ def test_drop_duplicate_times(df_meas_2010, caplog):

assert len(meas_duplicated) == 105120
assert len(meas_clean) == 52560

# assert logging messages
assert '52530 rows with duplicated time-value-combinations dropped' in caplog.text
assert '30 rows with duplicated times dropped' in caplog.text
assert "52530 rows with duplicated time-value-combinations dropped" in caplog.text
assert "30 rows with duplicated times dropped" in caplog.text


@pytest.mark.unittest
Expand All @@ -45,7 +45,7 @@ def test_drop_duplicate_times_noaction(df_meas_2010, caplog):

assert len(df_meas_2010) == 52560
assert len(meas_clean) == 52560

# assert that there is no logging messages
assert caplog.text == ""

Expand Down
79 changes: 44 additions & 35 deletions tests/test_overschrijding.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,15 @@ def test_calc_overschrijding(df_ext_12_2010_2014):
expected_values = np.array(
[
1.93,
2.09327434,
2.26311592,
2.44480348,
2.70434509,
2.91627091,
3.14247786,
3.46480369,
3.72735283,
4.00701551,
2.09356726,
2.2637632,
2.44533302,
2.70383299,
2.91416492,
3.13795447,
3.45560027,
3.71330277,
3.98682045,
]
)
assert np.allclose(dist["geinterpoleerd"].values, expected_values)
Expand All @@ -96,11 +96,24 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014):
1 / 100,
1 / 200,
]
hydra_values = np.array([2.473, 3.18 , 4.043, 4.164, 4.358, 4.696, 5.056, 5.468, 5.865,
6.328, 7.207])
hydra_index = np.array([1.00000000e+00, 1.00000000e-01, 2.00000000e-02, 1.00000000e-02,
3.33333333e-03, 1.00000000e-03, 3.33333333e-04, 1.00000000e-04,
3.33333333e-05, 1.00000000e-05, 1.00000000e-06])
hydra_values = np.array(
[2.473, 3.18, 4.043, 4.164, 4.358, 4.696, 5.056, 5.468, 5.865, 6.328, 7.207]
)
hydra_index = np.array(
[
1.00000000e00,
1.00000000e-01,
2.00000000e-02,
1.00000000e-02,
3.33333333e-03,
1.00000000e-03,
3.33333333e-04,
1.00000000e-04,
3.33333333e-05,
1.00000000e-05,
1.00000000e-06,
]
)
ser_hydra = pd.Series(hydra_values, index=hydra_index)
ser_hydra.attrs = df_ext_12_2010_2014.attrs
dist_hydra = {"Hydra-NL": ser_hydra}
Expand All @@ -121,12 +134,12 @@ def test_calc_overschrijding_with_hydra(df_ext_12_2010_2014):
expected_values = np.array(
[
1.93,
2.09327434,
2.26311587,
2.46299612,
2.79965222,
3.08436295,
3.4987347,
2.09356726,
2.26376316,
2.46348569,
2.79932582,
3.08359924,
3.49814949,
4.043,
4.164,
4.3095,
Expand Down Expand Up @@ -227,20 +240,18 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014):
expected_values_normal = np.array(
[
1.93,
2.09327434,
2.26311592,
2.44480348,
2.70434509,
2.91627091,
3.14247786,
3.46480369,
3.72735283,
4.00701551,
2.09356726,
2.2637632,
2.44533302,
2.70383299,
2.91416492,
3.13795447,
3.45560027,
3.71330277,
3.98682045,
]
)
assert np.allclose(
dist_normal["geinterpoleerd"].values, expected_values_normal
)
assert np.allclose(dist_normal["geinterpoleerd"].values, expected_values_normal)
expected_values_clip = np.array(
[
1.93,
Expand All @@ -255,9 +266,7 @@ def test_calc_overschrijding_clip_physical_break(df_ext_12_2010_2014):
3.90683996,
]
)
assert np.allclose(
dist_clip["geinterpoleerd"].values, expected_values_clip
)
assert np.allclose(dist_clip["geinterpoleerd"].values, expected_values_clip)


@pytest.mark.unittest
Expand Down
10 changes: 7 additions & 3 deletions tests/test_slotgemiddelden.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
assert set(slotgemiddelden_dict_noext.keys()) == set(expected_keys_noext)

# assertion of values
# fmt: off
wl_mean_peryear_expected = np.array([0.07960731, 0.08612119, 0.0853051,
0.07010864, 0.10051922])
hw_mean_peryear_expected = np.array([1.13968839, 1.12875177, 1.13988685,
Expand All @@ -77,6 +78,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
-0.61334278, -0.58024113])
range_mean_peryear_expected = np.array([1.74530541, 1.71964539, 1.73330976,
1.75488888, 1.77022697])
# fmt: on
assert np.allclose(
slotgemiddelden_dict_inclext["wl_mean_peryear"].values, wl_mean_peryear_expected
)
Expand All @@ -88,9 +90,10 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
)
assert np.allclose(
slotgemiddelden_dict_inclext["tidalrange_mean_peryear"].values,
range_mean_peryear_expected
range_mean_peryear_expected,
)

# fmt: off
wl_model_fit_expected = np.array([0.0141927, 0.08612119, 0.0853051,
0.07010864, 0.10051922, 0.23137634])
hw_model_fit_expected = np.array([1.05295416, 1.12875177, 1.13988685,
Expand All @@ -99,6 +102,7 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
-0.61334278, -0.58024113, -0.42969074])
range_model_fit_expected = np.array([1.72715816, 1.71964539, 1.73330976,
1.75488888, 1.77022697, 1.76587273])
# fmt: on
assert np.allclose(
slotgemiddelden_dict_inclext["wl_model_fit"].values, wl_model_fit_expected
)
Expand All @@ -109,8 +113,8 @@ def test_calc_slotgemiddelden(df_meas_2010_2014, df_ext_12_2010_2014):
slotgemiddelden_dict_inclext["LW_model_fit"].values, lw_model_fit_expected
)
assert np.allclose(
slotgemiddelden_dict_inclext["tidalrange_model_fit"].values,
range_model_fit_expected
slotgemiddelden_dict_inclext["tidalrange_model_fit"].values,
range_model_fit_expected,
)


Expand Down
45 changes: 31 additions & 14 deletions tests/test_tidalindicators.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,39 @@ def test_calc_HWLWtidalrange(df_ext_12_2010):

@pytest.mark.unittest
def test_calc_HWLWtidalindicators(df_ext_12_2010_2014):
ext_stats_notimezone = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014.tz_localize(None))
ext_stats_notimezone = kw.calc_HWLWtidalindicators(
df_ext_12_2010_2014.tz_localize(None)
)
ext_stats = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014)
ext_stats_min100 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=1)
ext_stats_min099 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=0.99)
ext_stats_min098 = kw.calc_HWLWtidalindicators(df_ext_12_2010_2014, min_coverage=0.98)

expected_keys = ['HW_mean', 'LW_mean',
'HW_mean_peryear', 'LW_mean_peryear',
'HW_monthmax_permonth', 'LW_monthmin_permonth',
'HW_monthmax_mean_peryear', 'LW_monthmax_mean_peryear',
'HW_monthmin_mean_peryear', 'LW_monthmin_mean_peryear']
ext_stats_min099 = kw.calc_HWLWtidalindicators(
df_ext_12_2010_2014, min_coverage=0.99
)
ext_stats_min098 = kw.calc_HWLWtidalindicators(
df_ext_12_2010_2014, min_coverage=0.98
)

expected_keys = [
"HW_mean",
"LW_mean",
"HW_mean_peryear",
"LW_mean_peryear",
"HW_monthmax_permonth",
"LW_monthmin_permonth",
"HW_monthmax_mean_peryear",
"LW_monthmax_mean_peryear",
"HW_monthmin_mean_peryear",
"LW_monthmin_mean_peryear",
]
for key in expected_keys:
assert key in ext_stats.keys()
assert (ext_stats[key] == ext_stats_notimezone[key]).all()

assert ext_stats_notimezone['HW_monthmax_permonth'].isnull().sum() == 0
assert ext_stats['HW_monthmax_permonth'].isnull().sum() == 0
assert ext_stats_min100['HW_monthmax_permonth'].isnull().sum() == 1
assert ext_stats_min099['HW_monthmax_permonth'].isnull().sum() == 1
assert ext_stats_min098['HW_monthmax_permonth'].isnull().sum() == 0
assert ext_stats_notimezone["HW_monthmax_permonth"].isnull().sum() == 0
assert ext_stats["HW_monthmax_permonth"].isnull().sum() == 0
assert ext_stats_min100["HW_monthmax_permonth"].isnull().sum() == 1
assert ext_stats_min099["HW_monthmax_permonth"].isnull().sum() == 1
assert ext_stats_min098["HW_monthmax_permonth"].isnull().sum() == 0


@pytest.mark.unittest
Expand All @@ -77,6 +90,7 @@ def test_calc_wltidalindicators(df_meas_2010_2014):
wl_mean_peryear_expected = np.array(
[0.07960731, 0.08612119, 0.0853051, 0.07010864, 0.10051922]
)
# fmt: off
wl_mean_permonth_expected = np.array([
-0.00227151, 0.089313 , 0.04443996, -0.03440509, -0.00206317,
0.04431481, 0.03877688, 0.18267697, 0.13494907, 0.18367832,
Expand All @@ -90,6 +104,7 @@ def test_calc_wltidalindicators(df_meas_2010_2014):
0.17321909, 0.23108102, 0.19502688, 0.06281138, 0.08588046,
-0.00553763, 0.03490278, 0.03113575, 0.03134954, 0.10553763,
0.16540771, 0.12535648, 0.20802195, 0.10014352, 0.25624552])
# fmt: on
assert np.allclose(wl_stats["wl_mean_peryear"].values, wl_mean_peryear_expected)
assert np.allclose(wl_stats["wl_mean_permonth"].values, wl_mean_permonth_expected)

Expand Down Expand Up @@ -221,6 +236,7 @@ def test_calc_wltidalindicators_ext(df_ext_12_2010_2014):
assert np.allclose(ext_stats["HW_mean_peryear"].values, hw_mean_peryear_expected)
assert np.allclose(ext_stats["LW_mean_peryear"].values, lw_mean_peryear_expected)

# fmt: off
hw_monthmax_permonth_expected = np.array([
1.94, 1.89, 1.86, 1.55, 1.74, 1.58, 1.54, 2.07, 2.11, 2.06, 1.9 ,
1.75, 1.69, 1.82, 1.49, 1.39, 1.4 , 1.71, 1.72, 1.66, 1.69, 1.59,
Expand All @@ -236,6 +252,7 @@ def test_calc_wltidalindicators_ext(df_ext_12_2010_2014):
-1.11, -1.65, -1.37, -1.11, -1.11, -1.05, -0.98, -1.07, -0.88,
-1.05, -1.15, -1.07, -1.32, -1.31, -1.21, -1.08, -1. , -1.03,
-1.07, -0.83, -0.98, -0.97, -0.99, -1.3 ])
# fmt: on
assert np.allclose(
ext_stats["HW_monthmax_permonth"].values, hw_monthmax_permonth_expected
)
Expand Down
13 changes: 9 additions & 4 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
def test_raise_extremes_with_aggers_raise_12345df(df_ext):
with pytest.raises(ValueError) as e:
raise_extremes_with_aggers(df_ext)
assert ("df_ext should only contain extremes (HWLWcode 1/2), "
"but it also contains aggers (HWLWcode 3/4/5") in str(e.value)
assert (
"df_ext should only contain extremes (HWLWcode 1/2), "
"but it also contains aggers (HWLWcode 3/4/5"
) in str(e.value)


@pytest.mark.unittest
Expand All @@ -24,7 +26,10 @@ def test_raise_extremes_with_aggers_pass_12df(df_ext_12_2010):
@pytest.mark.unittest
def test_raise_extremes_with_aggers_emptydf():
import pandas as pd
time_index = pd.DatetimeIndex([], dtype='datetime64[ns, Etc/GMT-1]', name='time', freq=None)
df_ext = pd.DataFrame({"HWLWcode":[]},index=time_index)

time_index = pd.DatetimeIndex(
[], dtype="datetime64[ns, Etc/GMT-1]", name="time", freq=None
)
df_ext = pd.DataFrame({"HWLWcode": []}, index=time_index)
df_ext.attrs["station"] = "dummy"
raise_extremes_with_aggers(df_ext=df_ext)
Loading