From 65adefef07737aaf931be61ab7ddf2971a8cd4af Mon Sep 17 00:00:00 2001
From: veenstrajelmer <60435591+veenstrajelmer@users.noreply.github.com>
Date: Mon, 24 Jun 2024 16:07:46 +0200
Subject: [PATCH] 97 improve docstrings for public functions (#98)

* docstrings for data_retrieve.py

* add docstrings to data_analysis.py

* added docstrings for tidalindicators.py

* added docstrings for havengetallen.py
---
 kenmerkendewaarden/data_analysis.py   |  83 +++++++++++++++++-
 kenmerkendewaarden/data_retrieve.py   |  90 +++++++++++++++++++-
 kenmerkendewaarden/havengetallen.py   |  42 +++++++++-
 kenmerkendewaarden/overschrijding.py  |  13 +--
 kenmerkendewaarden/tidalindicators.py | 116 ++++++++++++++------------
 tests/test_tidalindicators.py         |   6 +-
 6 files changed, 277 insertions(+), 73 deletions(-)

diff --git a/kenmerkendewaarden/data_analysis.py b/kenmerkendewaarden/data_analysis.py
index 87c17ac..8bf7042 100644
--- a/kenmerkendewaarden/data_analysis.py
+++ b/kenmerkendewaarden/data_analysis.py
@@ -22,7 +22,26 @@
 logger = logging.getLogger(__name__)
 
 
-def plot_measurements_amount(df, relative=False):
+def plot_measurements_amount(df:pd.DataFrame, relative:bool = False):
+    """
+    Read the measurements amount csv and generate a pcolormesh figure of all years and stations. 
+    The colors indicate the absolute or relative number of measurements per year.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Dataframe with the amount of measurements for several years per station.
+    relative : bool, optional
+        Whether to scale the amount of measurements with the median of all measurement amounts for the same year. The default is False.
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+        Figure handle.
+    ax : matplotlib.axes._axes.Axes
+        Figure axis handle.
+
+    """
     df = df.copy()
     df[df==0] = np.nan
     
@@ -45,7 +64,25 @@ def plot_measurements_amount(df, relative=False):
     return fig, ax
 
 
-def plot_measurements(df, df_ext=None):
+def plot_measurements(df:pd.DataFrame, df_ext:pd.DataFrame = None):
+    """
+    Generate a timeseries figure for the measurement timeseries (and extremes) of this station.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Dataframe with the measurement timeseries for a particular station.
+    df_ext : pd.DataFrame, optional
+        Dataframe with the measurement extremes for a particular station.
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+        Figure handle.
+    ax : matplotlib.axes._axes.Axes
+        Figure axis handle.
+
+    """
     station_df = df.attrs["station"]
     if df_ext is not None:
         station_df_ext = df_ext.attrs["station"]
@@ -83,7 +120,27 @@ def plot_measurements(df, df_ext=None):
     return fig, (ax1,ax2)
 
 
-def plot_stations(station_list, crs=None, add_labels=False):
+def plot_stations(station_list:list, crs:int = None, add_labels:bool = False):
+    """
+    Plot the stations by subsetting a ddlpy catalog with the provided list of stations.
+
+    Parameters
+    ----------
+    station_list : list
+        List of stations to plot the locations from.
+    crs : int, optional
+        Coordinate reference system, for instance 28992. The coordinates retrieved from the DDL will be converted to this EPSG. The default is None.
+    add_labels : bool, optional
+        Whether to add station code labels in the figure, useful for debugging. The default is False.
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+        Figure handle.
+    ax : matplotlib.axes._axes.Axes
+        Figure axis handle.
+
+    """
     locs_meas_ts_all, locs_meas_ext_all, _ = retrieve_catalog(crs=crs)
     locs_ts = locs_meas_ts_all.loc[locs_meas_ts_all.index.isin(station_list)]
     locs_ext = locs_meas_ext_all.loc[locs_meas_ext_all.index.isin(station_list)]
@@ -189,7 +246,25 @@ def get_stats_from_dataframe(df):
     return ds_stats
 
 
-def derive_statistics(dir_output, station_list, extremes):
+def derive_statistics(dir_output:str, station_list:list, extremes:bool):
+    """
+    Derive several statistics for the measurements of each station in the list.
+
+    Parameters
+    ----------
+    dir_output : str
+        Path where the measurement netcdf file will be stored.
+    station : list
+        list of station names to derive statistics for, for instance ["HOEKVHLD"].
+    extremes : bool
+        Whether to derive statistics from waterlevel timeseries or extremes.
+
+    Returns
+    -------
+    data_summary : pd.DataFrame
+        A dataframe with several statistics for each station from the provided list.
+
+    """
     row_list = []
     for current_station in station_list:
         logger.info(f'deriving statistics for {current_station} (extremes={extremes})')
diff --git a/kenmerkendewaarden/data_retrieve.py b/kenmerkendewaarden/data_retrieve.py
index ebb7353..6666832 100644
--- a/kenmerkendewaarden/data_retrieve.py
+++ b/kenmerkendewaarden/data_retrieve.py
@@ -86,7 +86,28 @@ def check_locations_amount(locations):
         raise ValueError(f"multiple stations present after station subsetting:\n{locations}")
 
 
-def retrieve_measurements_amount(dir_output, station_list, extremes:bool, start_date, end_date):
+def retrieve_measurements_amount(dir_output:str, station_list:list, extremes:bool, start_date:pd.Timestamp, end_date:pd.Timestamp):
+    """
+    Retrieve the amount of measurements or extremes for a single station from the DDL with ddlpy.
+
+    Parameters
+    ----------
+    dir_output : str
+        Path where the measurement netcdf file will be stored.
+    station : str
+        station name, for instance "HOEKVHLD".
+    extremes : bool
+        Whether to read measurements for waterlevel timeseries or extremes.
+    start_date : pd.Timestamp (or anything understood by pd.Timestamp)
+        start date of the measurements to be retrieved.
+    end_date : pd.Timestamp (or anything understood by pd.Timestamp)
+        end date of the measurements to be retrieved.
+
+    Returns
+    -------
+    None
+
+    """
     locs_meas_ts, locs_meas_ext, locs_meas_exttype = retrieve_catalog()
     
     if extremes:
@@ -130,7 +151,23 @@ def retrieve_measurements_amount(dir_output, station_list, extremes:bool, start_
     df_amount.to_csv(file_csv_amount)
     
 
-def read_measurements_amount(dir_output, extremes:bool):
+def read_measurements_amount(dir_output:str, extremes:bool):
+    """
+    Read the measurements amount csv into a dataframe.
+
+    Parameters
+    ----------
+    dir_output : str
+        Path where the measurements are stored.
+    extremes : bool
+        Whether to read measurements amount for waterlevel timeseries or extremes.
+
+    Returns
+    -------
+    df_amount : pd.DataFrame
+        DataFrame with the amount of measurements per year.
+
+    """
     if extremes:
         fname = DICT_FNAMES['amount_ext']
     else:
@@ -146,7 +183,30 @@ def read_measurements_amount(dir_output, extremes:bool):
     return df_amount
 
 
-def retrieve_measurements(dir_output:str, station:str, extremes:bool, start_date, end_date, drop_if_constant=None):
+def retrieve_measurements(dir_output:str, station:str, extremes:bool, start_date:pd.Timestamp, end_date:pd.Timestamp, drop_if_constant:list = None):
+    """
+    Retrieve timeseries with measurements or extremes for a single station from the DDL with ddlpy.
+
+    Parameters
+    ----------
+    dir_output : str
+        Path where the measurement netcdf file will be stored.
+    station : str
+        station name, for instance "HOEKVHLD".
+    extremes : bool
+        Whether to read measurements for waterlevel timeseries or extremes.
+    start_date : pd.Timestamp (or anything understood by pd.Timestamp)
+        start date of the measurements to be retrieved.
+    end_date : pd.Timestamp (or anything understood by pd.Timestamp)
+        end date of the measurements to be retrieved.
+    drop_if_constant : list, optional
+        A list of columns to drop if the row values are constant, to save disk space. The default is None.
+
+    Returns
+    -------
+    None
+
+    """
     
     locs_meas_ts, locs_meas_ext, locs_meas_exttype = retrieve_catalog()
     
@@ -225,7 +285,29 @@ def xarray_to_hatyan(ds):
     return df
 
 
-def read_measurements(dir_output:str, station:str, extremes:bool, return_xarray=False, nap_correction=False):
+def read_measurements(dir_output:str, station:str, extremes:bool, return_xarray:bool = False, nap_correction:bool = False):
+    """
+    Read the measurements netcdf as a dataframe.
+
+    Parameters
+    ----------
+    dir_output : str
+        Path where the measurements are stored.
+    station : str
+        station name, for instance "HOEKVHLD".
+    extremes : bool
+        Whether to read measurements for waterlevel timeseries or extremes.
+    return_xarray : bool, optional
+        Whether to return raw xarray.Dataset instead of a DataFrame. The default is False.
+    nap_correction : bool, optional
+        Whether to correct for NAP2005. The default is False.
+
+    Returns
+    -------
+    df_meas : pd.DataFrame
+        DataFrame with the measurements or extremes timeseries.
+
+    """
 
     if extremes:
         fname = DICT_FNAMES["meas_ext"].format(station=station)
diff --git a/kenmerkendewaarden/havengetallen.py b/kenmerkendewaarden/havengetallen.py
index f3e492b..e593a95 100644
--- a/kenmerkendewaarden/havengetallen.py
+++ b/kenmerkendewaarden/havengetallen.py
@@ -49,8 +49,8 @@ def calc_havengetallen(df_ext:pd.DataFrame, return_df_ext=False, min_coverage=No
     df_havengetallen : pd.DataFrame
         DataFrame with havengetallen for all hour-classes. 
         0 corresponds to spring, 6 corresponds to neap, mean is mean.
-    return_df_ext : pd.DataFrame
-        An enriched copy of the input DataFrame, mainly for plotting.
+    df_ext : pd.DataFrame
+        An enriched copy of the input DataFrame including a 'culm_hr' column.
 
     """
     raise_extremes_with_aggers(df_ext)
@@ -174,7 +174,25 @@ def calc_HWLW_culmhr_summary_tidalcoeff(df_ext):
     return HWLW_culmhr_summary
 
 
-def plot_HWLW_pertimeclass(df_ext, df_havengetallen):
+def plot_HWLW_pertimeclass(df_ext:pd.DataFrame, df_havengetallen:pd.DataFrame):
+    """
+    Plot the extremes for each hour-class, including a median line.
+
+    Parameters
+    ----------
+    df_ext : pd.DataFrame
+        DataFrame with measurement extremes, as provided by `kw.calc_havengetallen()`.
+    df_havengetallen : pd.DataFrame
+        DataFrame with havengetallen for all hour-classes, as provided by `kw.calc_havengetallen()`.
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+        Figure handle.
+    ax : matplotlib.axes._axes.Axes
+        Figure axis handle.
+
+    """
     
     assert 'culm_hr' in df_ext.columns
     
@@ -204,7 +222,23 @@ def plot_HWLW_pertimeclass(df_ext, df_havengetallen):
     return fig, axs
 
 
-def plot_aardappelgrafiek(df_havengetallen):
+def plot_aardappelgrafiek(df_havengetallen:pd.DataFrame):
+    """
+    Plot the median values of each hour-class in a aardappelgrafiek.
+
+    Parameters
+    ----------
+    df_havengetallen : pd.DataFrame
+        DataFrame with havengetallen for all hour-classes, as provided by `kw.calc_havengetallen()`.
+
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+        Figure handle.
+    ax : matplotlib.axes._axes.Axes
+        Figure axis handle.
+
+    """
     # remove mean column
     HWLW_culmhr_summary = df_havengetallen.loc[:11].copy()
     
diff --git a/kenmerkendewaarden/overschrijding.py b/kenmerkendewaarden/overschrijding.py
index f9b5dbb..736de82 100644
--- a/kenmerkendewaarden/overschrijding.py
+++ b/kenmerkendewaarden/overschrijding.py
@@ -9,7 +9,6 @@
 from matplotlib import ticker
 from scipy import optimize, signal
 from typing import Union, List
-import datetime as dt
 import logging
 from kenmerkendewaarden.data_retrieve import clip_timeseries_physical_break
 from kenmerkendewaarden.utils import raise_extremes_with_aggers
@@ -30,7 +29,7 @@ def get_threshold_rowidx(df):
 
 def calc_overschrijding(df_ext:pd.DataFrame, dist:dict = None, 
                         inverse:bool = False, clip_physical_break:bool = False, 
-                        rule_type:str = None, rule_value=None,
+                        rule_type:str = None, rule_value:(pd.Timestamp, float) = None,
                         interp_freqs:list = None):
     """
     Compute exceedance/deceedance frequencies based on measured extreme waterlevels.
@@ -47,8 +46,9 @@ def calc_overschrijding(df_ext:pd.DataFrame, dist:dict = None,
         Whether to exclude the part of the timeseries before physical breaks like estuary closures. The default is False.
     rule_type : str, optional
         break/linear/None, passed on to apply_trendanalysis(). The default is None.
-    rule_value : TYPE, optional
-        Value corresponding to rule_type. The default is None.
+    rule_value : (pd.Timestamp, float), optional
+        Value corresponding to rule_type, pd.Timestamp (or anything understood by pd.Timestamp) 
+        in case of rule_type='break', float in case of rule_type='linear'. The default is None.
     interp_freqs : list, optional
         The frequencies to interpolate to, providing this will result in a 
         "Geinterpoleerd" key in the returned dictionary. The default is None.
@@ -321,7 +321,7 @@ def get_total_years(df: pd.DataFrame) -> float:
     return (df.index[-1] - df.index[0]).total_seconds() / (3600 * 24 * 365)
 
 
-def apply_trendanalysis(df: pd.DataFrame, rule_type: str, rule_value: Union[float, dt.datetime]):
+def apply_trendanalysis(df: pd.DataFrame, rule_type: str, rule_value: Union[pd.Timestamp, float]):
     # There are 2 rule types:  - break -> Values before break are removed
     #                          - linear -> Values are increased/lowered based on value in value/year. It is assumes
     #                                      that there is no linear trend at the latest time (so it works its way back
@@ -329,7 +329,8 @@ def apply_trendanalysis(df: pd.DataFrame, rule_type: str, rule_value: Union[floa
     if rule_type == 'break':
         return df[rule_value:].copy()
     elif rule_type == 'linear':
-        df, rule_value = df.copy(), float(rule_value)
+        rule_value = float(rule_value)
+        df = df.copy()
         dx = np.array([rule_value*x.total_seconds()/(365*24*3600) for x in (df.index[-1] - df.index)])
         df['values'] = df['values'] + dx
         return df
diff --git a/kenmerkendewaarden/tidalindicators.py b/kenmerkendewaarden/tidalindicators.py
index e1373f2..7aa6751 100644
--- a/kenmerkendewaarden/tidalindicators.py
+++ b/kenmerkendewaarden/tidalindicators.py
@@ -23,21 +23,21 @@
 logger = logging.getLogger(__name__)
 
 
-def calc_HWLWtidalindicators(df_ext, min_coverage:float = None):
+def calc_HWLWtidalindicators(df_ext:pd.DataFrame, min_coverage:float = None):
     """
-    computes several tidal extreme indicators from tidal extreme dataset
+    Computes several tidal extreme indicators from tidal extreme dataset.
 
     Parameters
     ----------
-    data_pd_HWLW_all : TYPE
-        DESCRIPTION.
+    df_ext : pd.DataFrame
+        Dataframe with extremes timeseries.
     min_coverage : float, optional
         The minimum percentage (from 0 to 1) of timeseries coverage to consider the statistics to be valid. The default is None.
 
     Returns
     -------
-    dict_tidalindicators : TYPE
-        DESCRIPTION.
+    dict_tidalindicators : dict
+        Dictionary with several tidal indicators like yearly/monthly means.
 
     """
     # dropping the timezone makes the code below much faster and gives equal results: https://github.com/pandas-dev/pandas/issues/58956
@@ -93,57 +93,57 @@ def calc_HWLWtidalindicators(df_ext, min_coverage:float = None):
     HW_monthmin_mean_peryear = HW_monthmin_permonth.groupby(pd.PeriodIndex(HW_monthmin_permonth.index, freq="Y"))[['values']].mean()
     LW_monthmax_mean_peryear = LW_monthmax_permonth.groupby(pd.PeriodIndex(LW_monthmax_permonth.index, freq="Y"))[['values']].mean()
     
-    dict_HWLWtidalindicators = {'HW_mean':data_pd_HW['values'].mean(), #GHW
-                                'LW_mean':data_pd_LW['values'].mean(), #GLW
-                                'HW_mean_peryear':HW_mean_peryear['values'], #GHW peryear
-                                'LW_mean_peryear':LW_mean_peryear['values'], #GLW peryear
-                                'HW_monthmax_permonth':HW_monthmax_permonth['values'], #GHHW/GHWS permonth
-                                'LW_monthmin_permonth':LW_monthmin_permonth['values'], #GLLW/GLWS permonth
-                                'HW_monthmax_mean_peryear':HW_monthmax_mean_peryear['values'], #GHHW/GHWS peryear
-                                'LW_monthmin_mean_peryear':LW_monthmin_mean_peryear['values'], #GLLW/GLWS peryear
-                                'HW_monthmin_mean_peryear':HW_monthmin_mean_peryear['values'], #GLHW/GHWN peryear
-                                'LW_monthmax_mean_peryear':LW_monthmax_mean_peryear['values'], #GHLW/GLWN peryear
-                                }
-
-    return dict_HWLWtidalindicators
-
-
-def calc_wltidalindicators(data_wl_pd, min_coverage:float = None):
+    dict_tidalindicators = {'HW_mean':data_pd_HW['values'].mean(), #GHW
+                            'LW_mean':data_pd_LW['values'].mean(), #GLW
+                            'HW_mean_peryear':HW_mean_peryear['values'], #GHW peryear
+                            'LW_mean_peryear':LW_mean_peryear['values'], #GLW peryear
+                            'HW_monthmax_permonth':HW_monthmax_permonth['values'], #GHHW/GHWS permonth
+                            'LW_monthmin_permonth':LW_monthmin_permonth['values'], #GLLW/GLWS permonth
+                            'HW_monthmax_mean_peryear':HW_monthmax_mean_peryear['values'], #GHHW/GHWS peryear
+                            'LW_monthmin_mean_peryear':LW_monthmin_mean_peryear['values'], #GLLW/GLWS peryear
+                            'HW_monthmin_mean_peryear':HW_monthmin_mean_peryear['values'], #GLHW/GHWN peryear
+                            'LW_monthmax_mean_peryear':LW_monthmax_mean_peryear['values'], #GHLW/GLWN peryear
+                            }
+
+    return dict_tidalindicators
+
+
+def calc_wltidalindicators(df_meas:pd.DataFrame, min_coverage:float = None):
     """
-    computes monthly and yearly means from waterlevel timeseries
+    Computes monthly and yearly means from waterlevel timeseries.
 
     Parameters
     ----------
-    data_wl_pd : TYPE
-        DESCRIPTION.
+    df_meas : pd.DataFrame
+        Dataframe with waterlevel timeseries.
     min_coverage : float, optional
         The minimum percentage (from 0 to 1) of timeseries coverage to consider the statistics to be valid. The default is None.
 
     Returns
     -------
-    dict_wltidalindicators : TYPE
-        DESCRIPTION.
+    dict_tidalindicators : dict
+        Dictionary with several tidal indicators like yearly/monthly means.
 
     """
     
     # dropping the timezone makes the code below much faster and gives equal results: https://github.com/pandas-dev/pandas/issues/58956
-    if data_wl_pd.index.tz is not None:
-        data_wl_pd = data_wl_pd.tz_localize(None)
+    if df_meas.index.tz is not None:
+        df_meas = df_meas.tz_localize(None)
     
     # yearmean wl from wl values
-    wl_mean_peryear = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="Y"))[['values']].mean()
-    wl_mean_permonth = data_wl_pd.groupby(pd.PeriodIndex(data_wl_pd.index, freq="M"))[['values']].mean()
+    wl_mean_peryear = df_meas.groupby(pd.PeriodIndex(df_meas.index, freq="Y"))[['values']].mean()
+    wl_mean_permonth = df_meas.groupby(pd.PeriodIndex(df_meas.index, freq="M"))[['values']].mean()
     
     # replace invalids with nan (in case of too less values per month or year)
     if min_coverage is not None:
         assert 0 <= min_coverage <= 1
         # count timeseries values per year/month
-        wl_count_peryear = compute_actual_counts(data_wl_pd, freq="Y")
-        wl_count_permonth = compute_actual_counts(data_wl_pd, freq="M")
+        wl_count_peryear = compute_actual_counts(df_meas, freq="Y")
+        wl_count_permonth = compute_actual_counts(df_meas, freq="M")
         
         # compute expected counts and multiply with min_coverage to get minimal counts
-        min_count_peryear = compute_expected_counts(data_wl_pd, freq="Y") * min_coverage
-        min_count_permonth = compute_expected_counts(data_wl_pd, freq="M") * min_coverage
+        min_count_peryear = compute_expected_counts(df_meas, freq="Y") * min_coverage
+        min_count_permonth = compute_expected_counts(df_meas, freq="M") * min_coverage
         
         # set all statistics that were based on too little values to nan
         wl_mean_peryear.loc[wl_count_peryear<min_count_peryear] = np.nan
@@ -153,12 +153,12 @@ def calc_wltidalindicators(data_wl_pd, min_coverage:float = None):
     wl_mean_peryear = make_periodindex_contiguous(wl_mean_peryear)
     wl_mean_permonth = make_periodindex_contiguous(wl_mean_permonth)
 
-    dict_wltidalindicators = {'wl_mean':data_wl_pd['values'].mean(),
-                              'wl_mean_peryear':wl_mean_peryear['values'], #yearly mean wl
-                              'wl_mean_permonth':wl_mean_permonth['values'], #monthly mean wl
-                              }
+    dict_tidalindicators = {'wl_mean':df_meas['values'].mean(),
+                            'wl_mean_peryear':wl_mean_peryear['values'], #yearly mean wl
+                            'wl_mean_permonth':wl_mean_permonth['values'], #monthly mean wl
+                            }
 
-    return dict_wltidalindicators
+    return dict_tidalindicators
 
 
 def compute_actual_counts(df_meas, freq, column="values"):
@@ -229,7 +229,7 @@ def plot_pd_series(indicators_dict, ax):
 
 def plot_tidalindicators(dict_indicators:dict):
     """
-    plot tidalindicators
+    Plot tidalindicators.
 
     Parameters
     ----------
@@ -260,19 +260,31 @@ def plot_tidalindicators(dict_indicators:dict):
     return fig, ax
 
 
-def calc_HWLWtidalrange(ts_ext):
+def calc_HWLWtidalrange(df_ext:pd.DataFrame):
     """
-    creates column 'tidalrange' in ts_ext dataframe
+    Compute the difference between a high water and the following low water. 
+    This tidal range is added as a column to the df_ext dataframe.
+
+    Parameters
+    ----------
+    df_ext : pd.DataFrame
+        Dataframe with extremes timeseries.
+
+    Returns
+    -------
+    df_ext : pd.DataFrame
+        Input dataframe enriched with 'tidalindicators' and 'HWLWno' columns.
+
     """
-    raise_extremes_with_aggers(ts_ext)
+    raise_extremes_with_aggers(df_ext)
     
-    ts_ext = hatyan.calc_HWLWnumbering(ts_ext=ts_ext)
-    ts_ext['times_backup'] = ts_ext.index
-    ts_ext_idxHWLWno = ts_ext.set_index('HWLWno',drop=False)
-    ts_ext_idxHWLWno['tidalrange'] = ts_ext_idxHWLWno.loc[ts_ext_idxHWLWno['HWLWcode']==1,'values'] - ts_ext_idxHWLWno.loc[ts_ext_idxHWLWno['HWLWcode']==2,'values']
-    ts_ext = ts_ext_idxHWLWno.set_index('times_backup')
-    ts_ext.index.name = 'times'
-    return ts_ext
+    df_ext = hatyan.calc_HWLWnumbering(ts_ext=df_ext)
+    df_ext['times_backup'] = df_ext.index
+    df_ext_idxHWLWno = df_ext.set_index('HWLWno',drop=False)
+    df_ext_idxHWLWno['tidalrange'] = df_ext_idxHWLWno.loc[df_ext_idxHWLWno['HWLWcode']==1,'values'] - df_ext_idxHWLWno.loc[df_ext_idxHWLWno['HWLWcode']==2,'values']
+    df_ext = df_ext_idxHWLWno.set_index('times_backup')
+    df_ext.index.name = 'times'
+    return df_ext
 
 
 def calc_hat_lat_fromcomponents(comp: pd.DataFrame) -> tuple:
@@ -288,7 +300,7 @@ def calc_hat_lat_fromcomponents(comp: pd.DataFrame) -> tuple:
     Parameters
     ----------
     comp : pd.DataFrame
-        DESCRIPTION.
+        DataFrame with amplitudes and phases for a list of components.
 
     Returns
     -------
diff --git a/tests/test_tidalindicators.py b/tests/test_tidalindicators.py
index 42e2ed9..ed4cfea 100644
--- a/tests/test_tidalindicators.py
+++ b/tests/test_tidalindicators.py
@@ -9,9 +9,9 @@
 
 @pytest.mark.unittest
 def test_calc_HWLWtidalrange(df_ext_12_2010):
-    ts_ext_range = kw.calc_HWLWtidalrange(df_ext_12_2010)
+    df_ext_range = kw.calc_HWLWtidalrange(df_ext_12_2010)
     
-    ranges = ts_ext_range["tidalrange"].values
+    ranges = df_ext_range["tidalrange"].values
     vals_expected = np.array([1.89, 1.89, 1.87, 1.87, 1.97, 1.97, 2.05, 2.05, 2.05, 2.05])
     assert len(ranges) == 1411
     assert np.allclose(ranges[:10], vals_expected)
@@ -193,7 +193,7 @@ def test_calc_hat_lat_frommeasurements_tooshortperiod(df_meas_2010_2014):
 @pytest.mark.unittest
 def test_calc_HWLWtidalrange_aggers_input(df_ext_2010):
     with pytest.raises(ValueError) as e:
-        kw.calc_HWLWtidalrange(ts_ext=df_ext_2010)
+        kw.calc_HWLWtidalrange(df_ext=df_ext_2010)
     assert "contains aggers" in str(e.value)