From e3d0e2ada5dfd3fd05cbbebc5098806e16555778 Mon Sep 17 00:00:00 2001 From: veenstrajelmer <60435591+veenstrajelmer@users.noreply.github.com> Date: Wed, 23 Oct 2024 20:56:31 +0200 Subject: [PATCH] updated pandas version and applied diff on timedeltas (#161) * updated minimal pandas version * updated code to use diff for timedeltas * updated minimal numpy version * updated whatsnew --- docs/whats-new.md | 8 ++++---- kenmerkendewaarden/data_analysis.py | 4 +--- kenmerkendewaarden/tidalindicators.py | 4 +--- pyproject.toml | 22 +++++++++++----------- 4 files changed, 17 insertions(+), 21 deletions(-) diff --git a/docs/whats-new.md b/docs/whats-new.md index 5633466..9eb03ef 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -3,10 +3,10 @@ ## UNRELEASED ### Feat -- expanded physical_break_dict in [#151](https://github.com/Deltares-research/kenmerkendewaarden/pull/151) -- linear fit for slotgemiddelden (no nodal) in [#157](https://github.com/Deltares-research/kenmerkendewaarden/pull/157) -- exposed yearly tidal components with `kw.calc_getijcomponenten()` in [#159](https://github.com/Deltares-research/kenmerkendewaarden/pull/159) - +- expanded physical_break_dict in #151 +- linear fit for slotgemiddelden (no nodal) in #157 +- exposed yearly tidal components with `kw.calc_getijcomponenten()` in #159 +- support for timedelta `diff()` because of update to `pandas>=2.1.4` in #161 ## 0.3.0 (2024-10-01) diff --git a/kenmerkendewaarden/data_analysis.py b/kenmerkendewaarden/data_analysis.py index 91c0dad..8068286 100644 --- a/kenmerkendewaarden/data_analysis.py +++ b/kenmerkendewaarden/data_analysis.py @@ -221,9 +221,7 @@ def get_flat_meta_from_dataset(ds): def get_stats_from_dataframe(df): df_times = df.index ts_dupltimes = df_times.duplicated() - ts_timediff = ( - df_times[1:] - df_times[:-1] - ) # TODO: from pandas 2.1.4 the following also works: df_times.diff()[1:] + ts_timediff = df_times.diff()[1:] ds_stats = {} ds_stats["tstart"] = df_times.min() diff --git a/kenmerkendewaarden/tidalindicators.py b/kenmerkendewaarden/tidalindicators.py index 45f5ab0..04e77e4 100644 --- a/kenmerkendewaarden/tidalindicators.py +++ b/kenmerkendewaarden/tidalindicators.py @@ -221,9 +221,7 @@ def compute_expected_counts(ser_meas, freq): """ # TODO: beware of series with e.g. only first and last value of month/year, this will result in freq=30days and then expected count of 2, it will pass even if there is almost no data df_meas = pd.DataFrame(ser_meas) - df_meas["timediff"] = pd.TimedeltaIndex([pd.NaT]).append( - df_meas.index[1:] - df_meas.index[:-1] - ) # TODO: from pandas>=2.1.4 the following also works: df_times.diff() (which results in a timedeltaindex of the correct length) + df_meas["timediff"] = df_meas.index.diff() period_index = pd.PeriodIndex(df_meas.index, freq=freq) # compute median freq, the mean could be skewed in case of large gaps median_freq = df_meas.groupby(period_index)["timediff"].median() diff --git a/pyproject.toml b/pyproject.toml index c2c65d2..393937e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,25 +12,25 @@ keywords = ["kenmerkendewaarden"] license = { text = "GPLv3" } requires-python = ">=3.9" dependencies = [ - #numpy 1.21 is EOL since june 2023 - "numpy>=1.22.0", - #pandas<2.0.0 supports non-nanosecond timestep and therefore larger min/max range - "pandas>=2.0.0", - #matplotlib<3.5.2 raises "ValueError: Multi-dimensional indexing" in hatyan.plot_timeseries() + #numpy>=1.25.0 supports wide range of python versions + "numpy>=1.25.0", + #pandas>=2.1.4 supports diff on timedeltas + "pandas>=2.1.4", + #matplotlib>=3.5.2 resolved "ValueError: Multi-dimensional indexing" in hatyan.plot_timeseries() "matplotlib>=3.5.2", - #netcdf4<1.5.4 pip install fails in py39 + #netcdf4>=1.5.4 pip install succeeds in py39 "netcdf4>=1.5.4", - #xarray<2023.4.0 conflicts with pandas<2.0.0 for resampling, only available for py39 + #xarray>=2023.4.0 works with pandas<2.0.0 for resampling, only available for py39 "xarray>=2023.4.0", #rws-ddlpy>=0.6.0 returns correct measurements_amount dataframe "rws-ddlpy>=0.6.0", - #hatyan<2.9.0 has different datetime/tzone handling in astrog + #hatyan>=2.9.0 has different datetime/tzone handling in astrog "hatyan>=2.9.0", - #statsmodels<0.13.2 has no support for pandas 2.0.0 + #statsmodels>=0.13.2 has support for pandas 2.0.0 "statsmodels>=0.13.2", - #pyproj<3.1.0 not pip installable in py38 + #pyproj>=3.1.0 pip installable in py38 "pyproj>=3.1.0", - #pooch<1.1.0 do not have attribute retrieve + #pooch>=1.1.0 has attribute retrieve "pooch>=1.1.0", ] classifiers = [