diff --git a/AUTHORS.rst b/AUTHORS.rst index 504de88b76..50cd43a3d4 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -30,6 +30,7 @@ Core contributors Contributors ------------- +* `Gansheng Tan `_ *(Washington University, USA)* * `Hung Pham `_ *(Eureka Robotics, Singapore)* * `Christopher Schölzel `_ *(THM University of Applied Sciences, Germany)* * `Duy Le `_ *(Hubble, Singapore)* @@ -53,7 +54,7 @@ Contributors * `Marek Sokol `_ *(Faculty of Biomedical Engineering of the CTU in Prague, Czech Republic)* -Thanks also to `Gansheng Tan `_, `Chuan-Peng Hu `_, `@ucohen `_, `Anthony Gatti `_, `Julien Lamour `_, `@renatosc `_, `Nicolas Beaudoin-Gagnon `_ and `@rubinovitz `_ for their contribution in `NeuroKit 1 `_. +Thanks also to `Chuan-Peng Hu `_, `@ucohen `_, `Anthony Gatti `_, `Julien Lamour `_, `@renatosc `_, `Nicolas Beaudoin-Gagnon `_ and `@rubinovitz `_ for their contribution in `NeuroKit 1 `_. diff --git a/NEWS.rst b/NEWS.rst index 0bd047fb6a..0cfd6a1047 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -1,7 +1,16 @@ News ===== +0.2.4 +------------------- +Fixes ++++++++++++++ +* `eda_sympathetic()` has been reviewed: low-pass filter and resampling have been added to be in + line with the original paper +* `eda_findpeaks()` using methods proposed in nabian2018 is reviewed and improved. Differentiation + has been added before smoothing. Skin conductance response criteria have been revised based on + the original paper. diff --git a/docs/functions/eda.rst b/docs/functions/eda.rst index 90c50366d1..358a963e3b 100644 --- a/docs/functions/eda.rst +++ b/docs/functions/eda.rst @@ -31,14 +31,6 @@ Preprocessing """"""""""""""""""""" .. autofunction:: neurokit2.eda.eda_phasic -*eda_autocor()* -""""""""""""""" -.. autofunction:: neurokit2.eda.eda_autocor - -*eda_changepoints()* -""""""""""""""""""""" -.. autofunction:: neurokit2.eda.eda_changepoints - *eda_peaks()* """"""""""""""""""""" .. autofunction:: neurokit2.eda.eda_peaks @@ -47,9 +39,6 @@ Preprocessing """"""""""""""""""""" .. autofunction:: neurokit2.eda.eda_fixpeaks -*eda_sympathetic()* -""""""""""""""""""""" -.. autofunction:: neurokit2.eda.eda_sympathetic Analysis @@ -62,6 +51,18 @@ Analysis """"""""""""""""""""""""" .. autofunction:: neurokit2.eda.eda_intervalrelated +*eda_sympathetic()* +""""""""""""""""""""" +.. autofunction:: neurokit2.eda.eda_sympathetic + +*eda_autocor()* +""""""""""""""" +.. autofunction:: neurokit2.eda.eda_autocor + +*eda_changepoints()* +""""""""""""""""""""" +.. autofunction:: neurokit2.eda.eda_changepoints + Miscellaneous diff --git a/docs/functions/hrv.rst b/docs/functions/hrv.rst index 593b20ac05..fc316cbb2a 100644 --- a/docs/functions/hrv.rst +++ b/docs/functions/hrv.rst @@ -46,5 +46,5 @@ Intervals .. automodule:: neurokit2.hrv :members: - :exclude-members: hrv, hrv_time, hrv_frequency, hrv_nonlinear, hrv_rqa, hrv_rsa + :exclude-members: hrv, hrv_time, hrv_frequency, hrv_nonlinear, hrv_rqa, hrv_rsa, intervals_process, intervals_to_peaks diff --git a/docs/installation.rst b/docs/installation.rst index 36f700f72a..81a9206ec0 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -23,7 +23,7 @@ Then, at the top of each of your Python script, you should be able to import the .. code-block:: console - pip install https://github.com/neuropsychology/neurokit/zipball/dev + pip install https://github.com/neuropsychology/neurokit/zipball/dev --upgrade diff --git a/docs/readme/README_examples.py b/docs/readme/README_examples.py index e5aa375417..ce1295d4c3 100644 --- a/docs/readme/README_examples.py +++ b/docs/readme/README_examples.py @@ -10,6 +10,7 @@ # Setup matplotlib with Agg to run on server matplotlib.use("Agg") plt.rcParams["figure.figsize"] = (10, 6.5) +plt.rcParams["savefig.facecolor"] = "white" # ============================================================================= # Quick Example diff --git a/neurokit2/__init__.py b/neurokit2/__init__.py index d33895d0b2..7fa6113c27 100644 --- a/neurokit2/__init__.py +++ b/neurokit2/__init__.py @@ -32,7 +32,7 @@ from .video import * # Info -__version__ = "0.2.3" +__version__ = "0.2.4" # Maintainer info diff --git a/neurokit2/complexity/__init__.py b/neurokit2/complexity/__init__.py index 5143fbc2a6..88e1e555aa 100644 --- a/neurokit2/complexity/__init__.py +++ b/neurokit2/complexity/__init__.py @@ -8,6 +8,7 @@ from .complexity_lyapunov import complexity_lyapunov from .complexity_relativeroughness import complexity_relativeroughness from .complexity_rqa import complexity_rqa +from .entropy_angular import entropy_angular from .entropy_approximate import entropy_approximate from .entropy_attention import entropy_attention from .entropy_bubble import entropy_bubble @@ -155,6 +156,7 @@ "complexity_dfa", "complexity_relativeroughness", "complexity_rqa", + "entropy_angular", "entropy_maximum", "entropy_shannon", "entropy_shannon_joint", diff --git a/neurokit2/complexity/entropy_angular.py b/neurokit2/complexity/entropy_angular.py new file mode 100644 index 0000000000..128e7e5121 --- /dev/null +++ b/neurokit2/complexity/entropy_angular.py @@ -0,0 +1,147 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import scipy.stats + +from .utils_complexity_embedding import complexity_embedding + + +def entropy_angular(signal, delay=1, dimension=2, show=False, **kwargs): + """**Angular entropy (AngEn)** + + The Angular Entropy (AngEn) is the name that we use in NeuroKit to refer to the complexity + method described in Nardelli et al. (2022), referred as comEDA due to its application to EDA + signal. The method comprises the following steps: 1) Phase space reconstruction, 2) Calculation + of the angular distances between all the pairs of points in the phase space; 3) Computation of + the probability density function (PDF) of the distances; 4) Quadratic Rényi entropy of the PDF. + + Parameters + ---------- + signal : Union[list, np.array, pd.Series] + The signal (i.e., a time series) in the form of a vector of values. + delay : int + Time delay (often denoted *Tau* :math:`\\tau`, sometimes referred to as *lag*) in samples. + See :func:`complexity_delay` to estimate the optimal value for this parameter. + dimension : int + Embedding Dimension (*m*, sometimes referred to as *d* or *order*). See + :func:`complexity_dimension` to estimate the optimal value for this parameter. + **kwargs : optional + Other arguments. + + Returns + -------- + angen : float + The Angular Entropy (AngEn) of the signal. + info : dict + A dictionary containing additional information regarding the parameters used + to compute the index. + + See Also + -------- + entropy_renyi + + Examples + ---------- + .. ipython:: python + + import neurokit2 as nk + + # Simulate a Signal with Laplace Noise + signal = nk.signal_simulate(duration=2, frequency=[5, 3], noise=0.1) + + # Compute Angular Entropy + @savefig p_entropy_angular1.png scale=100% + angen, info = nk.entropy_angular(signal, delay=1, dimension=3, show=True) + @suppress + plt.close() + + + References + ----------- + * Nardelli, M., Greco, A., Sebastiani, L., & Scilingo, E. P. (2022). ComEDA: A new tool for + stress assessment based on electrodermal activity. Computers in Biology and Medicine, 150, + 106144. + + """ + # Sanity checks + if isinstance(signal, (np.ndarray, pd.DataFrame)) and signal.ndim > 1: + raise ValueError( + "Multidimensional inputs (e.g., matrices or multichannel data) are not supported yet." + ) + + # 1. Phase space reconstruction (time-delay embeddings) + embedded = complexity_embedding(signal, delay=delay, dimension=dimension) + + # 2. Angular distances between all the pairs of points in the phase space + angles = _angular_distance(embedded) + + # 3. Compute the probability density function (PDF) of the upper triangular matrix + bins, pdf = _kde_sturges(angles) + + # 4. Apply the quadratic Rényi entropy to the PDF + angen = -np.log2(np.sum(pdf**2)) + + # Normalize to the range [0, 1] by the log of the number of bins + + # Note that in the paper (eq. 4 page 4) there is a minus sign, but adding it would give + # negative values, plus the linked code does not seem to do that + # https://github.com/NardelliM/ComEDA/blob/main/comEDA.m#L103 + angen = angen / np.log2(len(bins)) + + if show is True: + # Plot the PDF as a bar chart + plt.bar(bins[:-1], pdf, width=bins[1] - bins[0], align="edge", alpha=0.5) + # Set the x-axis limits to the range of the data + plt.xlim([np.min(angles), np.max(angles)]) + # Print titles + plt.suptitle(f"Angular Entropy (AngEn) = {angen:.3f}") + plt.title("Distribution of Angular Distances:") + + return angen, {"bins": bins, "pdf": pdf} + + +def _angular_distance(m): + """ + Compute angular distances between all the pairs of points. + """ + # Get index of upper triangular to avoid double counting + idx = np.triu_indices(m.shape[0], k=1) + + # compute the magnitude of each vector + magnitudes = np.linalg.norm(m, axis=1) + + # compute the dot product between all pairs of vectors using np.matmul function, which is + # more efficient than np.dot for large matrices; and divide the dot product matrix by the + # product of the magnitudes to get the cosine of the angle + cos_angles = np.matmul(m, m.T)[idx] / np.outer(magnitudes, magnitudes)[idx] + + # clip the cosine values to the range [-1, 1] to avoid any numerical errors and compute angles + return np.arccos(np.clip(cos_angles, -1, 1)) + + +def _kde_sturges(x): + """ + Computes the PDF of a vector x using a kernel density estimator based on linear diffusion + processes with a Gaussian kernel. The number of bins of the PDF is chosen applying the Sturges + method. + """ + # Estimate the bandwidth + iqr = np.percentile(x, 75) - np.percentile(x, 25) + bandwidth = 0.9 * iqr / (len(x) ** 0.2) + + # Compute the number of bins using the Sturges method + nbins = int(np.ceil(np.log2(len(x)) + 1)) + + # Compute the bin edges + bins = np.linspace(np.min(x), np.max(x), nbins + 1) + + # Compute the kernel density estimate + xi = (bins[:-1] + bins[1:]) / 2 + pdf = np.sum( + scipy.stats.norm.pdf((xi.reshape(-1, 1) - x.reshape(1, -1)) / bandwidth), axis=1 + ) / (len(x) * bandwidth) + + # Normalize the PDF + pdf = pdf / np.sum(pdf) + + return bins, pdf diff --git a/neurokit2/complexity/entropy_differential.py b/neurokit2/complexity/entropy_differential.py index 84c2859905..2dae6fed5e 100644 --- a/neurokit2/complexity/entropy_differential.py +++ b/neurokit2/complexity/entropy_differential.py @@ -17,7 +17,10 @@ def entropy_differential(signal, base=2, **kwargs): ---------- signal : Union[list, np.array, pd.Series] The signal (i.e., a time series) in the form of a vector of values. - + base: float + The logarithmic base to use, defaults to ``2``, giving a unit in *bits*. Note that ``scipy. + stats.entropy()`` uses Euler's number (``np.e``) as default (the natural logarithm), giving + a measure of information expressed in *nats*. **kwargs : optional Other arguments passed to ``scipy.stats.differential_entropy()``. @@ -25,10 +28,6 @@ def entropy_differential(signal, base=2, **kwargs): -------- diffen : float The Differential entropy of the signal. - base: float - The logarithmic base to use, defaults to ``2``, giving a unit in *bits*. Note that ``scipy. - stats.entropy()`` uses Euler's number (``np.e``) as default (the natural logarithm), giving - a measure of information expressed in *nats*. info : dict A dictionary containing additional information regarding the parameters used to compute Differential entropy. diff --git a/neurokit2/complexity/entropy_sample.py b/neurokit2/complexity/entropy_sample.py index bcd104103e..3aacca7052 100644 --- a/neurokit2/complexity/entropy_sample.py +++ b/neurokit2/complexity/entropy_sample.py @@ -58,7 +58,7 @@ def entropy_sample(signal, delay=1, dimension=2, tolerance="sd", **kwargs): signal = nk.signal_simulate(duration=2, frequency=5) - sampen, parameters = nk.entropy_sample(signal) + sampen, parameters = nk.entropy_sample(signal, delay=1, dimension=2) sampen """ diff --git a/neurokit2/ecg/ecg_delineate.py b/neurokit2/ecg/ecg_delineate.py index 2b28a7790b..aee288c5be 100644 --- a/neurokit2/ecg/ecg_delineate.py +++ b/neurokit2/ecg/ecg_delineate.py @@ -164,6 +164,11 @@ def ecg_delineate( "NeuroKit error: ecg_delineate(): 'method' should be one of 'peak'," "'cwt' or 'dwt'." ) + # Ensure that all indices are not larger than ECG signal indices + for _, value in waves.items(): + if value[-1] >= len(ecg_cleaned): + value[-1] = np.nan + # Remove NaN in Peaks, Onsets, and Offsets waves_noNA = waves.copy() for feature in waves_noNA.keys(): @@ -947,11 +952,6 @@ def _ecg_delineator_peak(ecg, rpeaks=None, sampling_rate=1000): "ECG_T_Offsets": T_offsets, } - # Ensure that all indices are not larger than ECG signal indices - for _, value in info.items(): - if value[-1] >= len(ecg): - value[-1] = np.nan - # Return info dictionary return info diff --git a/neurokit2/ecg/ecg_segment.py b/neurokit2/ecg/ecg_segment.py index 89703244f0..867f1643de 100644 --- a/neurokit2/ecg/ecg_segment.py +++ b/neurokit2/ecg/ecg_segment.py @@ -59,6 +59,11 @@ def ecg_segment(ecg_cleaned, rpeaks=None, sampling_rate=1000, show=False): epochs_end=epochs_end, ) + # pad last heartbeat with nan so that segments are equal length + last_heartbeat_key = str(np.max(np.array(list(heartbeats.keys()), dtype=int))) + after_last_index = heartbeats[last_heartbeat_key]["Index"] < len(ecg_cleaned) + heartbeats[last_heartbeat_key].loc[after_last_index, "Signal"] = np.nan + if show: heartbeats_plot = epochs_to_df(heartbeats) heartbeats_pivoted = heartbeats_plot.pivot(index="Time", columns="Label", values="Signal") diff --git a/neurokit2/ecg/ecg_simulate.py b/neurokit2/ecg/ecg_simulate.py index 769b361775..6a9a75b560 100644 --- a/neurokit2/ecg/ecg_simulate.py +++ b/neurokit2/ecg/ecg_simulate.py @@ -5,6 +5,7 @@ import pandas as pd import scipy +from ..misc import check_random_state, check_random_state_children from ..signal import signal_distort, signal_resample @@ -17,6 +18,7 @@ def ecg_simulate( heart_rate_std=1, method="ecgsyn", random_state=None, + random_state_distort="spawn", **kwargs, ): """**Simulate an ECG/EKG signal** @@ -49,8 +51,14 @@ def ecg_simulate( `_. If ``"multileads"``, will return a DataFrame containing 12-leads (see `12-leads ECG simulation `_). - random_state : int - Seed for the random number generator. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. + random_state_distort : {'legacy', 'spawn'}, None, int, numpy.random.RandomState or numpy.random.Generator + Random state to be used to distort the signal. If ``"legacy"``, use the same random state used to + generate the signal (discouraged as it creates dependent random streams). If ``"spawn"``, spawn + independent children random number generators from the random_state argument. If any of the other types, + generate independent children random number generators from the random_state_distort provided (this + allows generating multiple version of the same signal distorted by different random noise realizations). **kwargs Other keywords parameters for ECGSYN algorithm, such as ``"lfhfratio"``, ``"ti"``, ``"ai"``, ``"bi"``. @@ -101,7 +109,7 @@ def ecg_simulate( """ # Seed the random generator for reproducible results - np.random.seed(random_state) + rng = check_random_state(random_state) # Generate number of samples automatically if length is unspecified if length is None: @@ -142,6 +150,7 @@ def ecg_simulate( hrstd=heart_rate_std, sfint=sampling_rate, gamma=gamma, + rng=rng, **kwargs, ) else: @@ -152,6 +161,7 @@ def ecg_simulate( hrstd=heart_rate_std, sfint=sampling_rate, gamma=np.ones((1, 5)), + rng=rng, **kwargs, ) # Cut to match expected length @@ -160,6 +170,9 @@ def ecg_simulate( # Add random noise if noise > 0: + # Seed for random noise + random_state_distort = check_random_state_children(random_state, random_state_distort, n_children=len(signals)) + # Call signal_distort on each signal for i in range(len(signals)): signals[i] = signal_distort( signals[i], @@ -167,7 +180,7 @@ def ecg_simulate( noise_amplitude=noise, noise_frequency=[5, 10, 100], noise_shape="laplace", - random_state=random_state, + random_state=random_state_distort[i], silent=True, ) @@ -180,8 +193,6 @@ def ecg_simulate( columns=["I", "II", "III", "aVR", "aVL", "aVF", "V1", "V2", "V3", "V4", "V5", "V6"], ) - # Reset random seed (so it doesn't affect global) - np.random.seed(None) return ecg @@ -237,6 +248,7 @@ def _ecg_simulate_ecgsyn( ai=(1.2, -5, 30, -7.5, 0.75), bi=(0.25, 0.1, 0.1, 0.1, 0.4), gamma=np.ones((1, 5)), + rng=None, **kwargs, ): """ @@ -329,7 +341,7 @@ def _ecg_simulate_ecgsyn( rrmean = 60 / hrmean n = 2 ** (np.ceil(np.log2(N * rrmean / trr))) - rr0 = _ecg_simulate_rrprocess(flo, fhi, flostd, fhistd, lfhfratio, hrmean, hrstd, sfrr, n) + rr0 = _ecg_simulate_rrprocess(flo, fhi, flostd, fhistd, lfhfratio, hrmean, hrstd, sfrr, n, rng) # Upsample rr time series from 1 Hz to sfint Hz rr = signal_resample(rr0, sampling_rate=1, desired_sampling_rate=sfint) @@ -418,7 +430,16 @@ def _ecg_simulate_derivsecgsyn(t, x, rr, ti, sfint, ai, bi): def _ecg_simulate_rrprocess( - flo=0.1, fhi=0.25, flostd=0.01, fhistd=0.01, lfhfratio=0.5, hrmean=60, hrstd=1, sfrr=1, n=256 + flo=0.1, + fhi=0.25, + flostd=0.01, + fhistd=0.01, + lfhfratio=0.5, + hrmean=60, + hrstd=1, + sfrr=1, + n=256, + rng=None, ): w1 = 2 * np.pi * flo w2 = 2 * np.pi * fhi @@ -440,7 +461,7 @@ def _ecg_simulate_rrprocess( Hw0 = np.concatenate((Hw[0 : int(n / 2)], Hw[int(n / 2) - 1 :: -1])) Sw = (sfrr / 2) * np.sqrt(Hw0) - ph0 = 2 * np.pi * np.random.uniform(size=int(n / 2 - 1)) + ph0 = 2 * np.pi * rng.uniform(size=int(n / 2 - 1)) ph = np.concatenate([[0], ph0, [0], -np.flipud(ph0)]) SwC = Sw * np.exp(1j * ph) x = (1 / n) * np.real(np.fft.ifft(SwC)) diff --git a/neurokit2/eda/eda_autocor.py b/neurokit2/eda/eda_autocor.py index 66329c7fc2..c2eb88ec47 100644 --- a/neurokit2/eda/eda_autocor.py +++ b/neurokit2/eda/eda_autocor.py @@ -7,7 +7,7 @@ def eda_autocor(eda_cleaned, sampling_rate=1000, lag=4): """**EDA Autocorrelation** - Compute autocorrelation measure of raw EDA signal i.e., the correlation between the time + Compute the autocorrelation measure of raw EDA signal i.e., the correlation between the time series data and a specified time-lagged version of itself. Parameters @@ -44,9 +44,10 @@ def eda_autocor(eda_cleaned, sampling_rate=1000, lag=4): References ----------- - - Halem, S., van Roekel, E., Kroencke, L., Kuper, N., & Denissen, J. (2020). Moments That Matter? - On the Complexity of Using Triggers Based on Skin Conductance to Sample Arousing Events Within - an Experience Sampling Framework. European Journal of Personality. + * van Halem, S., Van Roekel, E., Kroencke, L., Kuper, N., & Denissen, J. (2020). Moments that + matter? On the complexity of using triggers based on skin conductance to sample arousing + events within an experience sampling framework. European Journal of Personality, 34(5), + 794-807. """ # Sanity checks diff --git a/neurokit2/eda/eda_changepoints.py b/neurokit2/eda/eda_changepoints.py index 4817520ca8..0f1c65899a 100644 --- a/neurokit2/eda/eda_changepoints.py +++ b/neurokit2/eda/eda_changepoints.py @@ -53,9 +53,10 @@ def eda_changepoints(eda_cleaned, penalty=10000, show=False): References ----------- - * Halem, S., van Roekel, E., Kroencke, L., Kuper, N., & Denissen, J. (2020). Moments That - Matter? On the Complexity of Using Triggers Based on Skin Conductance to Sample Arousing - Events Within an Experience Sampling Framework. European Journal of Personality. + * van Halem, S., Van Roekel, E., Kroencke, L., Kuper, N., & Denissen, J. (2020). Moments that + matter? On the complexity of using triggers based on skin conductance to sample arousing + events within an experience sampling framework. European Journal of Personality, 34(5), + 794-807. """ # Sanity checks diff --git a/neurokit2/eda/eda_clean.py b/neurokit2/eda/eda_clean.py index 86155d8d8f..fa3a62cf84 100644 --- a/neurokit2/eda/eda_clean.py +++ b/neurokit2/eda/eda_clean.py @@ -5,13 +5,23 @@ import pandas as pd import scipy.signal -from ..misc import as_vector, NeuroKitWarning +from ..misc import NeuroKitWarning, as_vector from ..signal import signal_filter, signal_smooth def eda_clean(eda_signal, sampling_rate=1000, method="neurokit"): """**Preprocess Electrodermal Activity (EDA) signal** + This function cleans the EDA signal by removing noise and smoothing the signal with different methods. + + * **NeuroKit**: Default methods. Low-pass filter with a 3 Hz cutoff frequency and a 4th order + Butterworth filter. Note thaht if the sampling rate is lower than 7 Hz (as it is the case + with some signals recorded by wearables such as Empatica), the filtering is skipped (as there + is no high enough frequency to remove). + * **BioSPPy**: More aggresive filtering than NeuroKit's default method. Low-pass filter with a + 5 Hz cutoff frequency and a 4th order Butterworth filter. + + Parameters ---------- eda_signal : Union[list, np.array, pd.Series] @@ -19,7 +29,8 @@ def eda_clean(eda_signal, sampling_rate=1000, method="neurokit"): sampling_rate : int The sampling frequency of `rsp_signal` (in Hz, i.e., samples/second). method : str - The processing pipeline to apply. Can be one of ``"neurokit"`` (default) or ``"biosppy"``. + The processing pipeline to apply. Can be one of ``"neurokit"`` (default), ``"biosppy"``, or + ``"none"``. Returns ------- @@ -37,13 +48,15 @@ def eda_clean(eda_signal, sampling_rate=1000, method="neurokit"): import pandas as pd import neurokit2 as nk - eda = nk.eda_simulate(duration=30, sampling_rate=100, scr_number=10, noise=0.01, drift=0.02) - signals = pd.DataFrame({"EDA_Raw": eda, - "EDA_BioSPPy": nk.eda_clean(eda, sampling_rate=100,method='biosppy'), - "EDA_NeuroKit": nk.eda_clean(eda, sampling_rate=100, - method='neurokit')}) + # Simulate raw signal + eda = nk.eda_simulate(duration=15, sampling_rate=100, scr_number=10, noise=0.01, drift=0.02) + + # Clean + eda_clean1 = nk.eda_clean(eda, sampling_rate=100, method='neurokit') + eda_clean2 = nk.eda_clean(eda, sampling_rate=100, method='biosppy') + @savefig p_eda_clean.png scale=100% - fig = signals.plot() + nk.signal_plot([eda, eda_clean1, eda_clean2], labels=["Raw", "NeuroKit", "BioSPPy"]) @suppress plt.close() @@ -56,7 +69,7 @@ def eda_clean(eda_signal, sampling_rate=1000, method="neurokit"): warn( "There are " + str(n_missing) + " missing data points in your signal." " Filling missing values by using the forward filling method.", - category=NeuroKitWarning + category=NeuroKitWarning, ) eda_signal = _eda_clean_missing(eda_signal) @@ -65,6 +78,8 @@ def eda_clean(eda_signal, sampling_rate=1000, method="neurokit"): clean = _eda_clean_biosppy(eda_signal, sampling_rate) elif method in ["default", "neurokit", "nk"]: clean = _eda_clean_neurokit(eda_signal, sampling_rate) + elif method is None or method == "none": + clean = eda_signal else: raise ValueError("NeuroKit error: eda_clean(): 'method' should be one of 'biosppy'.") @@ -80,13 +95,23 @@ def _eda_clean_missing(eda_signal): return eda_signal + # ============================================================================= # NK # ============================================================================= def _eda_clean_neurokit(eda_signal, sampling_rate=1000): + if sampling_rate <= 6: + warn( + "EDA signal is sampled at very low frequency. Skipping filtering.", + category=NeuroKitWarning, + ) + return eda_signal + # Filtering - filtered = signal_filter(eda_signal, sampling_rate=sampling_rate, highcut=3, method="butterworth", order=4) + filtered = signal_filter( + eda_signal, sampling_rate=sampling_rate, highcut=3, method="butterworth", order=4 + ) return filtered @@ -103,13 +128,17 @@ def _eda_clean_biosppy(eda_signal, sampling_rate=1000): # Parameters order = 4 frequency = 5 - frequency = 2 * np.array(frequency) / sampling_rate # Normalize frequency to Nyquist Frequency (Fs/2). + frequency = ( + 2 * np.array(frequency) / sampling_rate + ) # Normalize frequency to Nyquist Frequency (Fs/2). # Filtering b, a = scipy.signal.butter(N=order, Wn=frequency, btype="lowpass", analog=False, output="ba") filtered = scipy.signal.filtfilt(b, a, eda_signal) # Smoothing - clean = signal_smooth(filtered, method="convolution", kernel="boxzen", size=int(0.75 * sampling_rate)) + clean = signal_smooth( + filtered, method="convolution", kernel="boxzen", size=int(0.75 * sampling_rate) + ) return clean diff --git a/neurokit2/eda/eda_findpeaks.py b/neurokit2/eda/eda_findpeaks.py index 0ec8044278..aa452ff17c 100644 --- a/neurokit2/eda/eda_findpeaks.py +++ b/neurokit2/eda/eda_findpeaks.py @@ -114,7 +114,6 @@ def eda_findpeaks(eda_phasic, sampling_rate=1000, method="neurokit", amplitude_m def _eda_findpeaks_neurokit(eda_phasic, amplitude_min=0.1): - peaks = signal_findpeaks(eda_phasic, relative_height_min=amplitude_min, relative_max=True) info = { @@ -129,27 +128,9 @@ def _eda_findpeaks_neurokit(eda_phasic, amplitude_min=0.1): def _eda_findpeaks_vanhalem2020(eda_phasic, sampling_rate=1000): """Follows approach of van Halem et al. (2020). - A peak is considered when there is a consistent increase of 0.5 seconds following a consistent decrease - of 0.5 seconds. - - Parameters - ---------- - eda_phasic : array - Input filterd EDA signal. - sampling_rate : int - Sampling frequency (Hz). Defaults to 1000Hz. + A peak is considered when there is a consistent increase of 0.5 seconds following a consistent + decrease of 0.5 seconds. - Returns - ------- - onsets : array - Indices of the SCR onsets. - peaks : array - Indices of the SRC peaks. - amplitudes : array - SCR pulse amplitudes. - - References - ---------- * van Halem, S., Van Roekel, E., Kroencke, L., Kuper, N., & Denissen, J. (2020). Moments That Matter? On the Complexity of Using Triggers Based on Skin Conductance to Sample Arousing Events Within an Experience Sampling Framework. European Journal of Personality. @@ -170,7 +151,8 @@ def _eda_findpeaks_vanhalem2020(eda_phasic, sampling_rate=1000): threshold = 0.5 * sampling_rate # Define each peak as a consistent increase of 0.5s - peaks = peaks[info["Width"] > threshold] + increase = info["Peaks"] - info["Onsets"] + peaks = peaks[increase > threshold] idx = np.where(peaks[:, None] == info["Peaks"][None, :])[1] # Check if each peak is followed by consistent decrease of 0.5s @@ -184,32 +166,16 @@ def _eda_findpeaks_vanhalem2020(eda_phasic, sampling_rate=1000): info = { "SCR_Onsets": info["Onsets"][idx], "SCR_Peaks": info["Peaks"][idx], - "SCR_Height": info["Height"][idx], + "SCR_Height": eda_phasic[info["Peaks"][idx]], } return info def _eda_findpeaks_gamboa2008(eda_phasic): - """Basic method to extract Skin Conductivity Responses (SCR) from an EDA signal following the approach in the thesis - by Gamboa (2008). + """Basic method to extract Skin Conductivity Responses (SCR) from an EDA signal following the + approach in the thesis by Gamboa (2008). - Parameters - ---------- - eda_phasic : array - Input filterd EDA signal. - - Returns - ------- - onsets : array - Indices of the SCR onsets. - peaks : array - Indices of the SRC peaks. - amplitudes : array - SCR pulse amplitudes. - - References - ---------- * Gamboa, H. (2008). Multi-modal behavioral biometrics based on hci and electrophysiology. PhD Thesis Universidade. @@ -255,26 +221,6 @@ def _eda_findpeaks_kim2004(eda_phasic, sampling_rate=1000, amplitude_min=0.1): """KBK method to extract Skin Conductivity Responses (SCR) from an EDA signal following the approach by Kim et al.(2004). - Parameters - ---------- - eda_phasic : array - Input filterd EDA signal. - sampling_rate : int - Sampling frequency (Hz). Defaults to 1000Hz. - amplitude_min : float - Minimum treshold by which to exclude SCRs. Defaults to 0.1. - - Returns - ------- - onsets : array - Indices of the SCR onsets. - peaks : array - Indices of the SRC peaks. - amplitudes : array - SCR pulse amplitudes. - - References - ---------- * Kim, K. H., Bang, S. W., & Kim, S. R. (2004). Emotion recognition system using short-term monitoring of physiological signals. Medical and biological engineering and computing, 42(3), 419-427. @@ -322,40 +268,36 @@ def _eda_findpeaks_kim2004(eda_phasic, sampling_rate=1000, amplitude_min=0.1): def _eda_findpeaks_nabian2018(eda_phasic): - """Basic method to extract Skin Conductivity Responses (SCR) from an EDA signal following the approach by Nabian et - al. (2018). + """Basic method to extract Skin Conductivity Responses (SCR) from an EDA signal following the + approach by Nabian et al. (2018). The amplitude of the SCR is obtained by finding the maximum + value between these two zero-crossings, and calculating the difference between the initial zero + crossing and the maximum value. Detected SCRs with amplitudes smaller than 10 percent of the + maximum SCR amplitudes that are already detected on the differentiated signal will be + eliminated. It is crucial that artifacts are removed before finding peaks. - Parameters - ---------- - eda_phasic : array - Input filterd EDA signal. - - Returns - ------- - onsets : array - Indices of the SCR onsets. - peaks : array - Indices of the SRC peaks. - amplitudes : array - SCR pulse amplitudes. - - References - ---------- - - Nabian, M., Yin, Y., Wormwood, J., Quigley, K. S., Barrett, L. F., & Ostadabbas, S. (2018). An - Open-Source Feature Extraction Tool for the Analysis of Peripheral Physiological Data. IEEE - journal of translational engineering in health and medicine, 6, 2800711. - https://doi.org/10.1109/JTEHM.2018.2878000 + * Nabian, M., Yin, Y., Wormwood, J., Quigley, K. S., Barrett, L. F., & Ostadabbas, S. (2018). An + Open-Source Feature Extraction Tool for the Analysis of Peripheral Physiological Data. IEEE + journal of translational engineering in health and medicine, 6, 2800711. + https://doi.org/10.1109/JTEHM.2018.2878000 """ + # differentiation + eda_phasic_diff = np.diff(eda_phasic) + # smooth - eda_phasic = signal_smooth(eda_phasic, kernel="bartlett", size=20) + eda_phasic_smoothed = signal_smooth(eda_phasic_diff, kernel="bartlett", size=20) # zero crossings - pos_crossings = signal_zerocrossings(eda_phasic, direction="positive") - neg_crossings = signal_zerocrossings(eda_phasic, direction="negative") + pos_crossings = signal_zerocrossings(eda_phasic_smoothed, direction="positive") + neg_crossings = signal_zerocrossings(eda_phasic_smoothed, direction="negative") + # if negative crossing happens before the positive crossing + # delete first negative crossing because we want to identify peaks + if neg_crossings[0] < pos_crossings[0]: + neg_crossings = neg_crossings[1:] # Sanitize consecutive crossings + if len(pos_crossings) > len(neg_crossings): pos_crossings = pos_crossings[0 : len(neg_crossings)] elif len(pos_crossings) < len(neg_crossings): @@ -366,15 +308,33 @@ def _eda_findpeaks_nabian2018(eda_phasic): amps_list = [] for i, j in zip(pos_crossings, neg_crossings): window = eda_phasic[i:j] - amp = np.max(window) + # The amplitude of the SCR is obtained by finding the maximum value + # between these two zero-crossings and calculating the difference + # between the initial zero crossing and the maximum value. + # amplitude defined in neurokit2 + amp = np.nanmax(window) # Detected SCRs with amplitudes less than 10% of max SCR amplitude will be eliminated - diff = amp - eda_phasic[i] - if not diff < (0.1 * amp): + # we append the first SCR + if len(amps_list) == 0: + # be careful, if two peaks have the same amplitude, np.where will return a list peaks = np.where(eda_phasic == amp)[0] - peaks_list.append(peaks) + # make sure that the peak is within the window + peaks = [peak for peak in [peaks] if peak > i and peak < j] + peaks_list.append(peaks[0]) onsets_list.append(i) amps_list.append(amp) + else: + # we have a list of peaks + # amplitude defined in the paper + diff = amp - eda_phasic[i] + if not diff < (0.1 * max(amps_list)): + peaks = np.where(eda_phasic == amp)[0] + # make sure that the peak is within the window + peaks = [peak for peak in [peaks] if peak > i and peak < j] + peaks_list.append(peaks[0]) + onsets_list.append(i) + amps_list.append(amp) # output info = { diff --git a/neurokit2/eda/eda_intervalrelated.py b/neurokit2/eda/eda_intervalrelated.py index d1336d980e..8897d0fc6e 100644 --- a/neurokit2/eda/eda_intervalrelated.py +++ b/neurokit2/eda/eda_intervalrelated.py @@ -1,20 +1,30 @@ # -*- coding: utf-8 -*- +from warnings import warn + import numpy as np import pandas as pd +from ..misc import NeuroKitWarning +from .eda_autocor import eda_autocor +from .eda_sympathetic import eda_sympathetic + -def eda_intervalrelated(data): +def eda_intervalrelated(data, sampling_rate=1000, **kwargs): """**EDA Analysis on Interval-Related Data** - Performs EDA analysis on longer periods of data (typically > 10 seconds), such as resting-state data. + Performs EDA analysis on longer periods of data (typically > 10 seconds), such as resting-state + data. Parameters ---------- data : Union[dict, pd.DataFrame] - A DataFrame containing the different processed signal(s) as - different columns, typically generated by ``"eda_process()"`` or - ``"bio_process()"``. Can also take a dict containing sets of - separately processed DataFrames. + A DataFrame containing the different processed signal(s) as different columns, typically + generated by :func:`eda_process` or :func:`bio_process`. Can also take a dict containing + sets of separately processed DataFrames. + sampling_rate : int + The sampling frequency of ``ecg_signal`` (in Hz, i.e., samples/second). Defaults to 1000. + **kwargs + Other arguments to be passed to the functions. Returns ------- @@ -23,6 +33,11 @@ def eda_intervalrelated(data): features consist of the following: * ``"SCR_Peaks_N"``: the number of occurrences of Skin Conductance Response (SCR). * ``"SCR_Peaks_Amplitude_Mean"``: the mean amplitude of the SCR peak occurrences. + * ``"EDA_Tonic_SD"``: the mean amplitude of the SCR peak occurrences. + * ``"EDA_Sympathetic"``: see :func:`eda_sympathetic` (only computed if signal duration + > 64 sec). + * ``"EDA_Autocorrelation"``: see :func:`eda_autocor` (only computed if signal duration + > 30 sec). See Also -------- @@ -41,58 +56,32 @@ def eda_intervalrelated(data): df, info = nk.eda_process(data["EDA"], sampling_rate=100) # Single dataframe is passed - nk.eda_intervalrelated(df) + nk.eda_intervalrelated(df, sampling_rate=100) epochs = nk.epochs_create(df, events=[0, 25300], sampling_rate=100, epochs_end=20) - nk.eda_intervalrelated(epochs) + nk.eda_intervalrelated(epochs, sampling_rate=100) """ - intervals = {} - # Format input if isinstance(data, pd.DataFrame): - peaks_cols = [col for col in data.columns if "SCR_Peaks" in col] - if len(peaks_cols) == 1: - intervals["Peaks_N"] = data[peaks_cols[0]].values.sum() - else: - raise ValueError( - "NeuroKit error: eda_intervalrelated(): Wrong" - "input, we couldn't extract SCR peaks." - "Please make sure your DataFrame" - "contains an `SCR_Peaks` column." - ) - amp_cols = [col for col in data.columns if "SCR_Amplitude" in col] - if len(amp_cols) == 1: - intervals["Peaks_Amplitude_Mean"] = ( - np.nansum(data[amp_cols[0]].values) / data[peaks_cols[0]].values.sum() - ) - else: - raise ValueError( - "NeuroKit error: eda_intervalrelated(): Wrong" - "input, we couldn't extract SCR peak amplitudes." - "Please make sure your DataFrame" - "contains an `SCR_Amplitude` column." - ) - - eda_intervals = pd.DataFrame.from_dict(intervals, orient="index").T.add_prefix( - "SCR_" - ) - + results = _eda_intervalrelated(data, sampling_rate=sampling_rate, **kwargs) + results = pd.DataFrame.from_dict(results, orient="index").T elif isinstance(data, dict): + results = {} for index in data: - intervals[index] = {} # Initialize empty container + results[index] = {} # Initialize empty container # Add label info - intervals[index]['Label'] = data[index]['Label'].iloc[0] + results[index]["Label"] = data[index]["Label"].iloc[0] - intervals[index] = _eda_intervalrelated_formatinput( - data[index], intervals[index] + results[index] = _eda_intervalrelated( + data[index], results[index], sampling_rate=sampling_rate, **kwargs ) - eda_intervals = pd.DataFrame.from_dict(intervals, orient="index") + results = pd.DataFrame.from_dict(results, orient="index") - return eda_intervals + return results # ============================================================================= @@ -100,34 +89,65 @@ def eda_intervalrelated(data): # ============================================================================= -def _eda_intervalrelated_formatinput(interval, output={}): +def _eda_intervalrelated( + data, output={}, sampling_rate=1000, method_sympathetic="posada", **kwargs +): """Format input for dictionary.""" # Sanitize input - colnames = interval.columns.values - if len([i for i in colnames if "SCR_Peaks" in i]) == 0: - raise ValueError( - "NeuroKit error: eda_intervalrelated(): Wrong" - "input, we couldn't extract SCR peaks." - "Please make sure your DataFrame" - "contains an `SCR_Peaks` column." - ) - return output # pylint: disable=W0101 - if len([i for i in colnames if "SCR_Amplitude" in i]) == 0: - raise ValueError( - "NeuroKit error: eda_intervalrelated(): Wrong" - "input we couldn't extract SCR peak amplitudes." - "Please make sure your DataFrame" - "contains an `SCR_Amplitude` column." - ) - return output # pylint: disable=W0101 + colnames = data.columns.values - peaks = interval["SCR_Peaks"].values - amplitude = interval["SCR_Amplitude"].values + # SCR Peaks + if "SCR_Peaks" not in colnames: + warn( + "We couldn't find an `SCR_Peaks` column. Returning NaN for N peaks.", + category=NeuroKitWarning, + ) + output["SCR_Peaks_N"] = np.nan + else: + output["SCR_Peaks_N"] = np.nansum(data["SCR_Peaks"].values) - output["SCR_Peaks_N"] = np.sum(peaks) - if np.sum(peaks) == 0: + # Peak amplitude + if "SCR_Amplitude" not in colnames: + warn( + "We couldn't find an `SCR_Amplitude` column. Returning NaN for peak amplitude.", + category=NeuroKitWarning, + ) output["SCR_Peaks_Amplitude_Mean"] = np.nan else: - output["SCR_Peaks_Amplitude_Mean"] = np.sum(amplitude) / np.sum(peaks) + output["SCR_Peaks_Amplitude_Mean"] = np.nanmean(data["SCR_Amplitude"].values) + + # Get variability of tonic + if "EDA_Tonic" in colnames: + output["EDA_Tonic_SD"] = np.nanstd(data["EDA_Tonic"].values) + + # EDA Sympathetic + output.update({"EDA_Sympathetic": np.nan, "EDA_SympatheticN": np.nan}) # Default values + if len(data) > sampling_rate * 64: + if "EDA_Clean" in colnames: + output.update( + eda_sympathetic( + data["EDA_Clean"], sampling_rate=sampling_rate, method=method_sympathetic + ) + ) + elif "EDA_Raw" in colnames: + # If not clean signal, use raw + output.update( + eda_sympathetic( + data["EDA_Raw"], sampling_rate=sampling_rate, method=method_sympathetic + ) + ) + + # EDA autocorrelation + output.update({"EDA_Autocorrelation": np.nan}) # Default values + if len(data) > sampling_rate * 30: # 30 seconds minimum (NOTE: somewhat arbitrary) + if "EDA_Clean" in colnames: + output["EDA_Autocorrelation"] = eda_autocor( + data["EDA_Clean"], sampling_rate=sampling_rate, **kwargs + ) + elif "EDA_Raw" in colnames: + # If not clean signal, use raw + output["EDA_Autocorrelation"] = eda_autocor( + data["EDA_Raw"], sampling_rate=sampling_rate, **kwargs + ) return output diff --git a/neurokit2/eda/eda_methods.py b/neurokit2/eda/eda_methods.py new file mode 100644 index 0000000000..44a6aacbe0 --- /dev/null +++ b/neurokit2/eda/eda_methods.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +import numpy as np + +from ..misc.report import get_kwargs +from .eda_clean import eda_clean +from .eda_peaks import eda_peaks +from .eda_phasic import eda_phasic + +def eda_methods( + sampling_rate=1000, + method="default", + method_cleaning="default", + method_peaks="default", + method_phasic="default", + **kwargs, +): + """**EDA Preprocessing Methods** + + This function analyzes and specifies the methods used in the preprocessing, and create a + textual description of the methods used. It is used by :func:`eda_process()` to dispatch the + correct methods to each subroutine of the pipeline and :func:`eda_report()` to create a + preprocessing report. + + Parameters + ---------- + sampling_rate : int + The sampling frequency of the raw EDA signal (in Hz, i.e., samples/second). + method : str + The method used for cleaning and peak finding if ``"method_cleaning"`` + and ``"method_peaks"`` are set to ``"default"``. Can be one of ``"default"``, ``"biosppy"``. + Defaults to ``"default"``. + method_cleaning: str + The method used to clean the raw EDA signal. If ``"default"``, + will be set to the value of ``"method"``. Defaults to ``"default"``. + For more information, see the ``"method"`` argument + of :func:`.eda_clean`. + method_peaks: str + The method used to find peaks. If ``"default"``, + will be set to the value of ``"method"``. Defaults to ``"default"``. + For more information, see the ``"method"`` argument + of :func:`.eda_peaks`. + method_phasic: str + The method used to decompose the EDA signal into phasic and tonic components. If ``"default"``, + will be set to the value of ``"method"``. Defaults to ``"default"``. + For more information, see the ``"method"`` argument + of :func:`.eda_phasic`. + **kwargs + Other arguments to be passed to :func:`.eda_clean`, + :func:`.eda_peaks`, and :func:`.eda_phasic`. + + Returns + ------- + report_info : dict + A dictionary containing the keyword arguments passed to the cleaning + and peak finding functions, text describing the methods, and the corresponding + references. + + See Also + -------- + eda_process, eda_clean, eda_peaks + """ + # Sanitize inputs + method_cleaning = str(method).lower() if method_cleaning == "default" else str(method_cleaning).lower() + method_phasic = str(method).lower() if method_phasic == "default" else str(method_phasic).lower() + method_peaks = str(method).lower() if method_peaks == "default" else str(method_peaks).lower() + + # Create dictionary with all inputs + report_info = { + "sampling_rate": sampling_rate, + "method_cleaning": method_cleaning, + "method_phasic": method_phasic, + "method_peaks": method_peaks, + "kwargs": kwargs, + } + + # Get arguments to be passed to underlying functions + kwargs_cleaning, report_info = get_kwargs(report_info, eda_clean) + kwargs_phasic, report_info = get_kwargs(report_info, eda_phasic) + kwargs_peaks, report_info = get_kwargs(report_info, eda_peaks) + + # Save keyword arguments in dictionary + report_info["kwargs_cleaning"] = kwargs_cleaning + report_info["kwargs_phasic"] = kwargs_phasic + report_info["kwargs_peaks"] = kwargs_peaks + + # Initialize refs list + refs = [] + + # 1. Cleaning + # ------------ + report_info["text_cleaning"] = f"The raw signal, sampled at {sampling_rate} Hz," + if method_cleaning == "biosppy": + report_info["text_cleaning"] += " was cleaned using the biosppy package." + elif method_cleaning in ["default", "neurokit", "nk"]: + report_info["text_cleaning"] += " was cleaned using the default method of the neurokit2 package." + elif method_cleaning in ["none"]: + report_info["text_cleaning"] += "was directly used without cleaning." + else: + report_info["text_cleaning"] += " was cleaned using the method described in " + method_cleaning + "." + + # 2. Phasic decomposition + # ----------------------- + # TODO: add descriptions of individual methods + report_info["text_phasic"] = "The signal was decomposed into phasic and tonic components using" + if method_phasic is None or method_phasic in ["none"]: + report_info["text_phasic"] = "There was no phasic decomposition carried out." + else: + report_info["text_phasic"] += " the method described in " + method_phasic + "." + + # 3. Peak detection + # ----------------- + report_info["text_peaks"] = "The cleaned signal was used to detect peaks using" + if method_peaks in ["gamboa2008", "gamboa"]: + report_info["text_peaks"] += " the method described in Gamboa et al. (2008)." + refs.append("""Gamboa, H. (2008). Multi-modal behavioral biometrics based on hci + and electrophysiology. PhD ThesisUniversidade.""") + elif method_peaks in ["kim", "kbk", "kim2004", "biosppy"]: + report_info["text_peaks"] += " the method described in Kim et al. (2004)." + refs.append("""Kim, K. H., Bang, S. W., & Kim, S. R. (2004). Emotion recognition system using short-term + monitoring of physiological signals. Medical and biological engineering and computing, 42(3), + 419-427.""") + elif method_peaks in ["nk", "nk2", "neurokit", "neurokit2"]: + report_info["text_peaks"] += " the default method of the `neurokit2` package." + refs.append("https://doi.org/10.21105/joss.01667") + elif method_peaks in ["vanhalem2020", "vanhalem", "halem2020"]: + report_info["text_peaks"] += " the method described in Vanhalem et al. (2020)." + refs.append("""van Halem, S., Van Roekel, E., Kroencke, L., Kuper, N., & Denissen, J. (2020). + Moments That Matter? On the Complexity of Using Triggers Based on Skin Conductance to Sample + Arousing Events Within an Experience Sampling Framework. European Journal of Personality.""") + elif method_peaks in ["nabian2018", "nabian"]: + report_info["text_peaks"] += " the method described in Nabian et al. (2018)." + refs.append("""Nabian, M., Yin, Y., Wormwood, J., Quigley, K. S., Barrett, L. F., & Ostadabbas, S. (2018). An + Open-Source Feature Extraction Tool for the Analysis of Peripheral Physiological Data. IEEE + journal of translational engineering in health and medicine, 6, 2800711.""") + else: + report_info[ + "text_peaks" + ] = f"The peak detection was carried out using the method {method_peaks}." + + # References + report_info["references"] = list(np.unique(refs)) + return report_info diff --git a/neurokit2/eda/eda_peaks.py b/neurokit2/eda/eda_peaks.py index 3fa8d06d84..2c6220a2ff 100644 --- a/neurokit2/eda/eda_peaks.py +++ b/neurokit2/eda/eda_peaks.py @@ -46,8 +46,6 @@ def eda_peaks(eda_phasic, sampling_rate=1000, method="neurokit", amplitude_min=0 -------- eda_simulate, eda_clean, eda_phasic, eda_process, eda_plot - - Examples --------- .. ipython:: python @@ -61,9 +59,9 @@ def eda_peaks(eda_phasic, sampling_rate=1000, method="neurokit", amplitude_min=0 eda_phasic = eda["EDA_Phasic"].values # Find peaks - _, kim2004 = nk.eda_peaks(eda_phasic, method="kim2004") - _, neurokit = nk.eda_peaks(eda_phasic, method="neurokit") - _, nabian2018 = nk.eda_peaks(eda_phasic, method="nabian2018") + _, kim2004 = nk.eda_peaks(eda_phasic, sampling_rate=100, method="kim2004") + _, neurokit = nk.eda_peaks(eda_phasic, sampling_rate=100, method="neurokit") + _, nabian2018 = nk.eda_peaks(eda_phasic, sampling_rate=100, method="nabian2018") @savefig p_eda_peaks.png scale=100% nk.events_plot([ @@ -122,7 +120,6 @@ def eda_peaks(eda_phasic, sampling_rate=1000, method="neurokit", amplitude_min=0 def _eda_peaks_getfeatures(info, eda_phasic, sampling_rate=1000, recovery_percentage=0.5): - # Sanity checks ----------------------------------------------------------- # Peaks (remove peaks before first onset) diff --git a/neurokit2/eda/eda_phasic.py b/neurokit2/eda/eda_phasic.py index caa6db5370..3e6b9ff9d7 100644 --- a/neurokit2/eda/eda_phasic.py +++ b/neurokit2/eda/eda_phasic.py @@ -1,20 +1,39 @@ # -*- coding: utf-8 -*- import numpy as np import pandas as pd +import scipy.linalg +import scipy.signal -from ..signal import signal_filter, signal_smooth +from ..signal import signal_filter, signal_resample, signal_smooth -def eda_phasic(eda_signal, sampling_rate=1000, method="highpass"): - """**Decompose Electrodermal Activity (EDA) into Phasic and Tonic Components** +def eda_phasic(eda_signal, sampling_rate=1000, method="highpass", **kwargs): + """**Electrodermal Activity (EDA) Decomposition into Phasic and Tonic Components** Decompose the Electrodermal Activity (EDA) into two components, namely **Phasic** and **Tonic**, using different methods including cvxEDA (Greco, 2016) or Biopac's Acqknowledge algorithms. + * **High-pass filtering**: Method implemented in Biopac's Acqknowledge. The raw EDA signal + is passed through a high pass filter with a cutoff frequency of 0.05 Hz + (cutoff frequency can be adjusted by the ``cutoff`` argument). + * **Median smoothing**: Method implemented in Biopac's Acqknowledge. The raw EDA signal is + passed through a median value smoothing filter, which removes areas of rapid change. The + phasic component is then calculated by subtracting the smoothed signal from the original. + This method is computationally intensive and the processing time depends on the smoothing + factor, which can be controlled by the as ``smoothing_factor`` argument, set by default to + ``4`` seconds. Higher values will produce results more rapidly. + * **cvxEDA**: Convex optimization approach to EDA processing (Greco, 2016). Requires the + ``cvxopt`` package (`> 1.3.0.1 `_) to + be installed. + * **SparsEDA**: Sparse non-negative deconvolution (Hernando-Gallego et al., 2017). + .. warning:: - cvxEDA algorithm seems broken. Help is needed to investigate. + sparsEDA was newly added thanks to + `this implementation `_. Help is needed to + double-check it, improve it and make it more concise and efficient. Also, sometimes it errors + for unclear reasons. Please help. Parameters @@ -24,8 +43,10 @@ def eda_phasic(eda_signal, sampling_rate=1000, method="highpass"): sampling_rate : int The sampling frequency of raw EDA signal (in Hz, i.e., samples/second). Defaults to 1000Hz. method : str - The processing pipeline to apply. Can be one of ``"cvxEDA"``, ``"median"``, - ``"smoothmedian"``, ``"highpass"``, ``"biopac"``, or ``"acqknowledge"``. + The processing pipeline to apply. Can be one of ``"cvxEDA"``, ``"smoothmedian"``, + ``"highpass"``. Defaults to ``"highpass"``. + **kwargs : dict + Additional arguments to pass to the specific method. Returns ------- @@ -49,21 +70,27 @@ def eda_phasic(eda_signal, sampling_rate=1000, method="highpass"): eda_signal = nk.eda_simulate(duration=30, scr_number=5, drift=0.1) # Decompose using different algorithms - # cvxEDA = nk.eda_phasic(nk.standardize(eda_signal), method='cvxeda') - smoothMedian = nk.eda_phasic(nk.standardize(eda_signal), method='smoothmedian') - highpass = nk.eda_phasic(nk.standardize(eda_signal), method='highpass') - - data = pd.concat([ - # cvxEDA.add_suffix('_cvxEDA'), - smoothMedian.add_suffix('_SmoothMedian'), - highpass.add_suffix('_Highpass')], axis=1) - data["EDA_Raw"] = eda_signal + # cvxEDA = nk.eda_phasic(eda_signal, method='cvxeda') + smoothMedian = nk.eda_phasic(eda_signal, method='smoothmedian') + highpass = nk.eda_phasic(eda_signal, method='highpass') + sparse = nk.eda_phasic(eda_signal, method='smoothmedian') + # Extract tonic and phasic components for plotting + t1, p1 = smoothMedian["EDA_Tonic"].values, smoothMedian["EDA_Phasic"].values + t2, p2 = highpass["EDA_Tonic"].values, highpass["EDA_Phasic"].values + t3, p3 = sparse["EDA_Tonic"].values, sparse["EDA_Phasic"].values + + # Plot tonic @savefig p_eda_phasic1.png scale=100% - fig = data.plot() + nk.signal_plot([t1, t2, t3], labels=["SmoothMedian", "Highpass", "Sparse"]) @suppress plt.close() + # Plot phasic + @savefig p_eda_phasic2.png scale=100% + nk.signal_plot([p1, p2, p3], labels=["SmoothMedian", "Highpass", "Sparse"]) + @suppress + plt.close() **Example 2**: Real data. @@ -86,19 +113,28 @@ def eda_phasic(eda_signal, sampling_rate=1000, method="highpass"): * Greco, A., Valenza, G., Lanata, A., Scilingo, E. P., & Citi, L. (2016). cvxEDA: A convex optimization approach to electrodermal activity processing. IEEE Transactions on Biomedical Engineering, 63(4), 797-804. + * Hernando-Gallego, F., Luengo, D., & Artés-Rodríguez, A. (2017). Feature extraction of + galvanic skin responses by nonnegative sparse deconvolution. IEEE journal of biomedical and + shealth informatics, 22(5), 1385-1394. """ method = method.lower() # remove capitalised letters - if method == "cvxeda": - data = _eda_phasic_cvxeda(eda_signal, sampling_rate) + if method in ["cvxeda", "convex"]: + tonic, phasic = _eda_phasic_cvxeda(eda_signal, sampling_rate) + elif method in ["sparse", "sparseda"]: + tonic, phasic = _eda_phasic_sparsEDA(eda_signal, sampling_rate) elif method in ["median", "smoothmedian"]: - data = _eda_phasic_mediansmooth(eda_signal, sampling_rate) - elif method in ["highpass", "biopac", "acqknowledge"]: - data = _eda_phasic_highpass(eda_signal, sampling_rate) + tonic, phasic = _eda_phasic_mediansmooth(eda_signal, sampling_rate, **kwargs) + elif method in ["neurokit", "highpass", "biopac", "acqknowledge"]: + tonic, phasic = _eda_phasic_highpass(eda_signal, sampling_rate, **kwargs) else: - raise ValueError("NeuroKit error: eda_clean(): 'method' should be one of 'biosppy'.") + raise ValueError( + "NeuroKit error: eda_phasic(): 'method' should be one of " + "'cvxeda', 'median', 'smoothmedian', 'neurokit', 'highpass', " + "'biopac', 'acqknowledge'." + ) - return data + return pd.DataFrame({"EDA_Tonic": tonic, "EDA_Phasic": phasic}) # ============================================================================= @@ -111,24 +147,20 @@ def _eda_phasic_mediansmooth(eda_signal, sampling_rate=1000, smoothing_factor=4) tonic = signal_smooth(eda_signal, kernel="median", size=size) phasic = eda_signal - tonic - out = pd.DataFrame({"EDA_Tonic": np.array(tonic), "EDA_Phasic": np.array(phasic)}) + return np.array(tonic), np.array(phasic) - return out - -def _eda_phasic_highpass(eda_signal, sampling_rate=1000): +def _eda_phasic_highpass(eda_signal, sampling_rate=1000, cutoff=0.05): """One of the two methods available in biopac's acqknowledge (https://www.biopac.com/knowledge-base/phasic-eda- issue/)""" - phasic = signal_filter(eda_signal, sampling_rate=sampling_rate, lowcut=0.05, method="butter") - tonic = signal_filter(eda_signal, sampling_rate=sampling_rate, highcut=0.05, method="butter") - - out = pd.DataFrame({"EDA_Tonic": np.array(tonic), "EDA_Phasic": np.array(phasic)}) + phasic = signal_filter(eda_signal, sampling_rate=sampling_rate, lowcut=cutoff, method="butter") + tonic = signal_filter(eda_signal, sampling_rate=sampling_rate, highcut=cutoff, method="butter") - return out + return tonic, phasic # ============================================================================= -# cvxEDA +# cvxEDA (Greco et al., 2016) # ============================================================================= def _eda_phasic_cvxeda( eda_signal, @@ -197,10 +229,10 @@ def _cvx(m, n): ar = np.array( [ (a1 * frequency + 2.0) * (a0 * frequency + 2.0), - 2.0 * a1 * a0 * frequency ** 2 - 8.0, + 2.0 * a1 * a0 * frequency**2 - 8.0, (a1 * frequency - 2.0) * (a0 * frequency - 2.0), ] - ) / ((a1 - a0) * frequency ** 2) + ) / ((a1 - a0) * frequency**2) ma = np.array([1.0, 2.0, 1.0]) # matrices for ARMA model @@ -214,7 +246,10 @@ def _cvx(m, n): spl = np.convolve(spl, spl, "full") spl /= max(spl) # matrix of spline regressors - i = np.c_[np.arange(-(len(spl) // 2), (len(spl) + 1) // 2)] + np.r_[np.arange(0, n, delta_knot_s)] + i = ( + np.c_[np.arange(-(len(spl) // 2), (len(spl) + 1) // 2)] + + np.r_[np.arange(0, n, delta_knot_s)] + ) nB = i.shape[1] j = np.tile(np.arange(nB), (len(spl), 1)) p = np.tile(spl, (nB, 1)).T @@ -242,7 +277,9 @@ def _cvx(m, n): ] ) h = cvxopt.matrix([_cvx(n, 1), 0.5, 0.5, eda, 0.5, 0.5, _cvx(nB, 1)]) - c = cvxopt.matrix([(cvxopt.matrix(alpha, (1, n)) * A).T, _cvx(nC, 1), 1, gamma, _cvx(nB, 1)]) + c = cvxopt.matrix( + [(cvxopt.matrix(alpha, (1, n)) * A).T, _cvx(nC, 1), 1, gamma, _cvx(nB, 1)] + ) res = cvxopt.solvers.conelp(c, G, h, dims={"l": n, "q": [n + 2, nB + 2], "s": []}) else: # Use qp @@ -254,9 +291,15 @@ def _cvx(m, n): [Mt * B, Ct * B, Bt * B + gamma * cvxopt.spmatrix(1.0, range(nB), range(nB))], ] ) - f = cvxopt.matrix([(cvxopt.matrix(alpha, (1, n)) * A).T - Mt * eda, -(Ct * eda), -(Bt * eda)]) + f = cvxopt.matrix( + [(cvxopt.matrix(alpha, (1, n)) * A).T - Mt * eda, -(Ct * eda), -(Bt * eda)] + ) res = cvxopt.solvers.qp( - H, f, cvxopt.spmatrix(-A.V, A.I, A.J, (n, len(f))), cvxopt.matrix(0.0, (n, 1)), kktsolver='chol2' + H, + f, + cvxopt.spmatrix(-A.V, A.I, A.J, (n, len(f))), + cvxopt.matrix(0.0, (n, 1)), + kktsolver="chol2", ) cvxopt.solvers.options.clear() @@ -266,47 +309,453 @@ def _cvx(m, n): q = res["x"][:n] phasic = M * q - out = pd.DataFrame({"EDA_Tonic": np.array(tonic)[:, 0], "EDA_Phasic": np.array(phasic)[:, 0]}) - - return out + # Return tonic and phasic components + return np.array(tonic)[:, 0], np.array(phasic)[:, 0] # ============================================================================= -# pyphysio +# sparsEDA (Hernando-Gallego et al., 2017) # ============================================================================= -# def _eda_phasic_pyphysio(eda_signal, sampling_rate=1000): -# """ -# Try to implement this: https://github.com/MPBA/pyphysio/blob/master/pyphysio/estimators/Estimators.py#L190 -# -# Examples -# --------- -# import neurokit2 as nk -# >>> -# eda_signal = nk.data("bio_eventrelated_100hz")["EDA"].values -# sampling_rate = 100 -# """ -# bateman = _eda_simulate_bateman(sampling_rate=sampling_rate) -# bateman_peak = np.argmax(bateman) -# -# # Prepare the input signal to avoid starting/ending peaks in the driver -# bateman_first_half = bateman[0:bateman_peak + 1] -# bateman_first_half = eda_signal[0] * (bateman_first_half - np.min(bateman_first_half)) / ( -# np.max(bateman_first_half) - np.min(bateman_first_half)) -# -# bateman_second_half = bateman[bateman_peak:] -# bateman_second_half = eda_signal[-1] * (bateman_second_half - np.min(bateman_second_half)) / ( -# np.max(bateman_second_half) - np.min(bateman_second_half)) -# -# signal = np.r_[bateman_first_half, eda_signal, bateman_second_half] -# -# def deconvolve(signal, irf): -# # normalize: -# irf = irf / np.sum(irf) -# # FFT method -# fft_signal = np.fft.fft(signal, n=len(signal)) -# fft_irf = np.fft.fft(irf, n=len(signal)) -# out = np.fft.ifft(fft_signal / fft_irf) -# return out -# -# out = deconvolve(signal, bateman) -# nk.signal_plot(out) + + +def _eda_phasic_sparsEDA( + eda_signal, sampling_rate=8, epsilon=0.0001, Kmax=40, Nmin=5 / 4, rho=0.025 +): + """ " + Credits go to: + - https://github.com/fhernandogallego/sparsEDA (Matlab original implementation) + - https://github.com/yskong224/SparsEDA-python (Python implementation) + + Parameters + ---------- + epsilon + step remainder + maxIters + maximum number of LARS iterations + dmin + maximum distance between sparse reactions + rho + minimun threshold of sparse reactions + + Returns + ------- + driver + driver responses, tonic component + SCL + low component + MSE + reminder of the signal fitting + """ + + dmin = Nmin * sampling_rate + original_length = len(eda_signal) # Used for resampling at the end + + # Exceptions + # if len(eda_signal) / sampling_rate < 80: + # raise AssertionError("Signal not enough large. longer than 80 seconds") + + if np.sum(np.isnan(eda_signal)) > 0: + raise AssertionError("Signal contains NaN") + + # Preprocessing + signalAdd = np.zeros(len(eda_signal) + (20 * sampling_rate) + (60 * sampling_rate)) + signalAdd[0 : 20 * sampling_rate] = eda_signal[0] + signalAdd[20 * sampling_rate : 20 * sampling_rate + len(eda_signal)] = eda_signal + signalAdd[20 * sampling_rate + len(eda_signal) :] = eda_signal[-1] + + # Resample to 8 Hz + eda_signal = signal_resample(eda_signal, sampling_rate=sampling_rate, desired_sampling_rate=8) + new_sr = 8 + + Nss = len(eda_signal) + Ns = len(signalAdd) + b0 = 0 + + pointerS = 20 * new_sr + pointerE = pointerS + Nss + # signalRs = signalAdd[pointerS:pointerE] + + # overlap Save + durationR = 70 + Lreg = int(20 * new_sr * 3) + L = 10 * new_sr + N = durationR * new_sr + T = 6 + + Rzeros = np.zeros([N + L, Lreg * 5]) + srF = new_sr * np.array([0.5, 0.75, 1, 1.25, 1.5]) + + for j in range(0, len(srF)): + t_rf = np.arange(0, 10 + 1e-10, 1 / srF[j]) # 10 sec + taus = np.array([0.5, 2, 1]) + rf_biexp = np.exp(-t_rf / taus[1]) - np.exp(-t_rf / taus[0]) + rf_est = taus[2] * rf_biexp + rf_est = rf_est / np.sqrt(np.sum(rf_est**2)) + + rf_est_zeropad = np.zeros(len(rf_est) + (N - len(rf_est))) + rf_est_zeropad[: len(rf_est)] = rf_est + Rzeros[0:N, j * Lreg : (j + 1) * Lreg] = scipy.linalg.toeplitz( + rf_est_zeropad, np.zeros(Lreg) + ) + + R0 = Rzeros[0:N, 0 : 5 * Lreg] + R = np.zeros([N, T + Lreg * 5]) + R[0:N, T:] = R0 + + # SCL + R[0:Lreg, 0] = np.linspace(0, 1, Lreg) + R[0:Lreg, 1] = -np.linspace(0, 1, Lreg) + R[int(Lreg / 3) : Lreg, 2] = np.linspace(0, 2 / 3, int((2 * Lreg) / 3)) + R[int(Lreg / 3) : Lreg, 3] = -np.linspace(0, 2 / 3, int((2 * Lreg) / 3)) + R[int(2 * Lreg / 3) : Lreg, 4] = np.linspace(0, 1 / 3, int(Lreg / 3)) + R[int(2 * Lreg / 3) : Lreg, 5] = -np.linspace(0, 1 / 3, int(Lreg / 3)) + Cte = np.sum(R[:, 0] ** 2) + R[:, 0:6] = R[:, 0:6] / np.sqrt(Cte) + + # Loop + cutS = 0 + cutE = N + slcAux = np.zeros(Ns) + driverAux = np.zeros(Ns) + resAux = np.zeros(Ns) + aux = 0 + + while cutE < Ns: + aux = aux + 1 + signalCut = signalAdd[cutS:cutE] + + if b0 == 0: + b0 = signalCut[0] + + signalCutIn = signalCut - b0 + beta, _, _, _, _, _ = lasso(R, signalCutIn, sampling_rate, Kmax, epsilon) + + signalEst = (np.matmul(R, beta) + b0).reshape(-1) + + # remAout = (signalCut - signalEst).^2; + # res2 = sum(remAout(20*sampling_rate+1:(40*sampling_rate))); + # res3 = sum(remAout(40*sampling_rate+1:(60*sampling_rate))); + + remAout = (signalCut - signalEst) ** 2 + res2 = np.sum(remAout[20 * sampling_rate : 40 * sampling_rate]) + res3 = np.sum(remAout[40 * sampling_rate : 60 * sampling_rate]) + + jump = 1 + if res2 < 1: + jump = 2 + if res3 < 1: + jump = 3 + + SCL = np.matmul(R[:, 0:6], beta[0:6, :]) + b0 + + SCRline = beta[6:, :] + + SCRaux = np.zeros([Lreg, 5]) + SCRaux[:] = SCRline.reshape([5, Lreg]).transpose() + driver = SCRaux.sum(axis=1) + + b0 = np.matmul(R[jump * 20 * sampling_rate - 1, 0:6], beta[0:6, :]) + b0 + + driverAux[cutS : cutS + (jump * 20 * sampling_rate)] = driver[0 : jump * sampling_rate * 20] + slcAux[cutS : cutS + (jump * 20 * sampling_rate)] = SCL[ + 0 : jump * sampling_rate * 20 + ].reshape(-1) + resAux[cutS : cutS + (jump * 20 * sampling_rate)] = remAout[0 : jump * sampling_rate * 20] + cutS = cutS + jump * 20 * sampling_rate + cutE = cutS + N + + SCRaux = driverAux[pointerS:pointerE] + SCL = slcAux[pointerS:pointerE] + MSE = resAux[pointerS:pointerE] + + # PP + ind = np.argwhere(SCRaux > 0).reshape(-1) + scr_temp = SCRaux[ind] + ind2 = np.argsort(scr_temp)[::-1] + scr_ord = scr_temp[ind2] + scr_fin = [scr_ord[0]] + ind_fin = [ind[ind2[0]]] + + for i in range(1, len(ind2)): + if np.all(np.abs(ind[ind2[i]] - ind_fin) >= dmin): + scr_fin.append(scr_ord[i]) + ind_fin.append(ind[ind2[i]]) + + driver = np.zeros(len(SCRaux)) + driver[np.array(ind_fin)] = np.array(scr_fin) + + scr_max = scr_fin[0] + threshold = rho * scr_max + driver[driver < threshold] = 0 + + # Resample + SCL = signal_resample(SCL, desired_length=original_length) + MSE = signal_resample(MSE, desired_length=original_length) + + return driver, SCL, MSE + + +def lasso(R, s, sampling_rate, maxIters, epsilon): + N = len(s) + W = R.shape[1] + + OptTol = -10 + solFreq = 0 + resStop2 = 0.0005 + lmbdaStop = 0 + zeroTol = 1e-5 + + x = np.zeros(W) + x_old = np.zeros(W) + iter = 0 + + c = np.matmul(R.transpose(), s.reshape(-1, 1)).reshape(-1) + + lmbda = np.max(c) + + if lmbda < 0: + raise Exception( + "y is not expressible as a non-negative linear combination of the columns of X" + ) + + newIndices = np.argwhere(np.abs(c - lmbda) < zeroTol).flatten() + + collinearIndices = [] + beta = [] + duals = [] + res = s + + if (lmbdaStop > 0 and lmbda < lmbdaStop) or ((epsilon > 0) and (np.linalg.norm(res) < epsilon)): + activationHist = [] + numIters = 0 + + R_I = [] + activeSet = [] + + for j in range(0, len(newIndices)): + iter = iter + 1 + R_I, flag = updateChol(R_I, N, W, R, 1, activeSet, newIndices[j], zeroTol) + activeSet.append(newIndices[j]) + activationHist = activeSet.copy() + + # Loop + done = 0 + while done == 0: + if len(activationHist) == 4: + lmbda = np.max(c) + newIndices = np.argwhere(np.abs(c - lmbda) < zeroTol).flatten() + activeSet = [] + for j in range(0, len(newIndices)): + iter = iter + 1 + R_I, flag = updateChol(R_I, N, W, R, 1, activeSet, newIndices[j], zeroTol) + activeSet.append(newIndices[j]) + [activationHist.append(ele) for ele in activeSet] + else: + lmbda = c[activeSet[0]] + + dx = np.zeros(W) + + if len(np.array([R_I]).flatten()) == 1: + z = scipy.linalg.solve( + R_I.reshape([-1, 1]), + np.sign(c[np.array(activeSet).flatten()].reshape(-1, 1)), + transposed=True, + lower=False, + ) + else: + z = scipy.linalg.solve( + R_I, + np.sign(c[np.array(activeSet).flatten()].reshape(-1, 1)), + transposed=True, + lower=False, + ) + + if len(np.array([R_I]).flatten()) == 1: + dx[np.array(activeSet).flatten()] = scipy.linalg.solve( + R_I.reshape([-1, 1]), z, transposed=False, lower=False + ) + else: + dx[np.array(activeSet).flatten()] = scipy.linalg.solve( + R_I, z, transposed=False, lower=False + ).flatten() + + v = np.matmul( + R[:, np.array(activeSet).flatten()], dx[np.array(activeSet).flatten()].reshape(-1, 1) + ) + ATv = np.matmul(R.transpose(), v).flatten() + + gammaI = np.Inf + removeIndices = [] + + inactiveSet = np.arange(0, W) + if len(np.array(activeSet).flatten()) > 0: + inactiveSet[np.array(activeSet).flatten()] = -1 + + if len(np.array(collinearIndices).flatten()) > 0: + inactiveSet[np.array(collinearIndices).flatten()] = -1 + + inactiveSet = np.argwhere(inactiveSet >= 0).flatten() + + if len(inactiveSet) == 0: + gammaIc = 1 + newIndices = [] + else: + epsilon = 1e-12 + gammaArr = (lmbda - c[inactiveSet]) / (1 - ATv[inactiveSet] + epsilon) + + gammaArr[gammaArr < zeroTol] = np.Inf + gammaIc = np.min(gammaArr) + # Imin = np.argmin(gammaArr) + newIndices = inactiveSet[(np.abs(gammaArr - gammaIc) < zeroTol)] + + gammaMin = min(gammaIc, gammaI) + + x = x + gammaMin * dx + res = res - gammaMin * v.flatten() + c = c - gammaMin * ATv + + if ( + ((lmbda - gammaMin) < OptTol) + or ((lmbdaStop > 0) and (lmbda <= lmbdaStop)) + or ((epsilon > 0) and (np.linalg.norm(res) <= epsilon)) + ): + newIndices = [] + removeIndices = [] + done = 1 + + if (lmbda - gammaMin) < OptTol: + # print(lmbda-gammaMin) + pass + if np.linalg.norm(res[0 : sampling_rate * 20]) <= resStop2: + done = 1 + if np.linalg.norm(res[sampling_rate * 20 : sampling_rate * 40]) <= resStop2: + done = 1 + if np.linalg.norm(res[sampling_rate * 40 : sampling_rate * 60]) <= resStop2: + done = 1 + + if gammaIc <= gammaI and len(newIndices) > 0: + for j in range(0, len(newIndices)): + iter = iter + 1 + R_I, flag = updateChol( + R_I, N, W, R, 1, np.array(activeSet).flatten(), newIndices[j], zeroTol + ) + + if flag: + collinearIndices.append(newIndices[j]) + else: + activeSet.append(newIndices[j]) + activationHist.append(newIndices[j]) + + if gammaI <= gammaIc: + for j in range(0, len(removeIndices)): + iter = iter + 1 + col = np.argwhere(np.array(activeSet).flatten() == removeIndices[j]).flatten() + + R_I = downdateChol(R_I, col) + activeSet.pop(col) + collinearIndices = [] + + x[np.array(removeIndices).flatten()] = 0 + activationHist.append(-removeIndices) + if iter >= maxIters: + done = 1 + + if len(np.argwhere(x < 0).flatten()) > 0: + x = x_old.copy() + done = 1 + else: + x_old = x.copy() + + if done or ((solFreq > 0) and not (iter % solFreq)): + beta.append(x) + duals.append(v) + numIters = iter + return np.array(beta).reshape(-1, 1), numIters, activationHist, duals, lmbda, res + + +def updateChol(R_I, n, N, R, explicitA, activeSet, newIndex, zeroTol): + # global opts_tr, zeroTol + + flag = 0 + + newVec = R[:, newIndex] + + if len(activeSet) == 0: + R_I0 = np.sqrt(np.sum(newVec**2)) + else: + if explicitA: + if len(np.array([R_I]).flatten()) == 1: + p = scipy.linalg.solve( + np.array(R_I).reshape(-1, 1), + np.matmul(R[:, activeSet].transpose(), R[:, newIndex]), + transposed=True, + lower=False, + ) + else: + p = scipy.linalg.solve( + R_I, + np.matmul(R[:, activeSet].transpose(), R[:, newIndex]), + transposed=True, + lower=False, + ) + + else: + # Original matlab code: + + # Global var for linsolve functions.. + # optsUT = True + # opts_trUT = True + # opts_trTRANSA = True + # AnewVec = feval(R,2,n,length(activeSet),newVec,activeSet,N); + # p = linsolve(R_I,AnewVec,opts_tr); + + # Translation by chatGPT-3, might be wrong + AnewVec = np.zeros((n, 1)) + for i in range(len(activeSet)): + AnewVec += R[2, :, activeSet[i]] * newVec[i] + p = scipy.linalg.solve(R_I, AnewVec, transposed=True, lower=False) + + q = np.sum(newVec**2) - np.sum(p**2) + if q <= zeroTol: + flag = 1 + R_I0 = R_I.copy() + else: + if len(np.array([R_I]).shape) == 1: + R_I = np.array([R_I]).reshape(-1, 1) + # print(R_I) + R_I0 = np.zeros([np.array(R_I).shape[0] + 1, R_I.shape[1] + 1]) + R_I0[0 : R_I.shape[0], 0 : R_I.shape[1]] = R_I + R_I0[0 : R_I.shape[0], -1] = p + R_I0[-1, -1] = np.sqrt(q) + + return R_I0, flag + + +def downdateChol(R, j): + # global opts_tr, zeroTol + + def planerot(x): + # http://statweb.stanford.edu/~susan/courses/b494/index/node30.html + if x[1] != 0: + r = np.linalg.norm(x) + G = np.zeros(len(x) + 2) + G[: len(x)] = x / r + G[-2] = -x[1] / r + G[-1] = x[0] / r + else: + G = np.eye(2) + return G, x + + R1 = np.zeros([R.shape[0], R.shape[1] - 1]) + R1[:, :j] = R[:, :j] + R1[:, j:] = R[:, j + 1 :] + # m = R1.shape[0] + n = R1.shape[1] + + for k in range(j, n): + p = np.array([k, k + 1]) + G, R[p, k] = planerot(R[p, k]) + if k < n: + R[p, k + 1 : n] = G * R[p, k + 1 : n] + + return R[:n, :] diff --git a/neurokit2/eda/eda_plot.py b/neurokit2/eda/eda_plot.py index c8340312ca..e0a5fb4536 100644 --- a/neurokit2/eda/eda_plot.py +++ b/neurokit2/eda/eda_plot.py @@ -4,10 +4,8 @@ import numpy as np import pandas as pd -from ..misc import find_closest - -def eda_plot(eda_signals, sampling_rate=None): +def eda_plot(eda_signals, sampling_rate=None, static=True): """**Visualize electrodermal activity (EDA) data** Parameters @@ -16,6 +14,10 @@ def eda_plot(eda_signals, sampling_rate=None): DataFrame obtained from :func:`eda_process()`. sampling_rate : int The desired sampling rate (in Hz, i.e., samples/second). Defaults to None. + static : bool + If True, a static plot will be generated with matplotlib. + If False, an interactive plot will be generated with plotly. + Defaults to True. Returns ------- @@ -45,130 +47,300 @@ def eda_plot(eda_signals, sampling_rate=None): onsets = np.where(eda_signals["SCR_Onsets"] == 1)[0] half_recovery = np.where(eda_signals["SCR_Recovery"] == 1)[0] - fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, ncols=1, sharex=True) - # Determine unit of x-axis. - last_ax = fig.get_axes()[-1] if sampling_rate is not None: - last_ax.set_xlabel("Seconds") + x_label = "Seconds" x_axis = np.linspace(0, len(eda_signals) / sampling_rate, len(eda_signals)) else: - last_ax.set_xlabel("Samples") + x_label = "Samples" x_axis = np.arange(0, len(eda_signals)) - plt.tight_layout(h_pad=0.2) + if static: + fig, (ax0, ax1, ax2) = plt.subplots(nrows=3, ncols=1, sharex=True) - # Plot cleaned and raw respiration as well as peaks and troughs. - ax0.set_title("Raw and Cleaned Signal") - fig.suptitle("Electrodermal Activity (EDA)", fontweight="bold") + last_ax = fig.get_axes()[-1] + last_ax.set_xlabel(x_label) + plt.tight_layout(h_pad=0.2) - ax0.plot(x_axis, eda_signals["EDA_Raw"], color="#B0BEC5", label="Raw", zorder=1) - ax0.plot( - x_axis, eda_signals["EDA_Clean"], color="#9C27B0", label="Cleaned", linewidth=1.5, zorder=1 - ) - ax0.legend(loc="upper right") - - # Plot skin cnoductance response. - ax1.set_title("Skin Conductance Response (SCR)") - - # Plot Phasic. - ax1.plot( - x_axis, - eda_signals["EDA_Phasic"], - color="#E91E63", - label="Phasic Component", - linewidth=1.5, - zorder=1, - ) + # Plot cleaned and raw electrodermal activity. + ax0.set_title("Raw and Cleaned Signal") + fig.suptitle("Electrodermal Activity (EDA)", fontweight="bold") - # Mark segments. - risetime_coord, amplitude_coord, halfr_coord = _eda_plot_dashedsegments( - eda_signals, ax1, x_axis, onsets, peaks, half_recovery - ) + ax0.plot(x_axis, eda_signals["EDA_Raw"], color="#B0BEC5", label="Raw", zorder=1) + ax0.plot( + x_axis, + eda_signals["EDA_Clean"], + color="#9C27B0", + label="Cleaned", + linewidth=1.5, + zorder=1, + ) + ax0.legend(loc="upper right") - risetime = matplotlib.collections.LineCollection( - risetime_coord, colors="#FFA726", linewidths=1, linestyle="dashed" - ) - ax1.add_collection(risetime) + # Plot skin conductance response. + ax1.set_title("Skin Conductance Response (SCR)") - amplitude = matplotlib.collections.LineCollection( - amplitude_coord, colors="#1976D2", linewidths=1, linestyle="solid" - ) - ax1.add_collection(amplitude) + # Plot Phasic. + ax1.plot( + x_axis, + eda_signals["EDA_Phasic"], + color="#E91E63", + label="Phasic Component", + linewidth=1.5, + zorder=1, + ) - halfr = matplotlib.collections.LineCollection( - halfr_coord, colors="#FDD835", linewidths=1, linestyle="dashed" - ) - ax1.add_collection(halfr) - ax1.legend(loc="upper right") + # Mark segments. + risetime_coord, amplitude_coord, halfr_coord = _eda_plot_dashedsegments( + eda_signals, ax1, x_axis, onsets, peaks, half_recovery + ) - # Plot Tonic. - ax2.set_title("Skin Conductance Level (SCL)") - ax2.plot( - x_axis, eda_signals["EDA_Tonic"], color="#673AB7", label="Tonic Component", linewidth=1.5 - ) - ax2.legend(loc="upper right") + risetime = matplotlib.collections.LineCollection( + risetime_coord, colors="#FFA726", linewidths=1, linestyle="dashed" + ) + ax1.add_collection(risetime) + + amplitude = matplotlib.collections.LineCollection( + amplitude_coord, colors="#1976D2", linewidths=1, linestyle="solid" + ) + ax1.add_collection(amplitude) + + halfr = matplotlib.collections.LineCollection( + halfr_coord, colors="#FDD835", linewidths=1, linestyle="dashed" + ) + ax1.add_collection(halfr) + ax1.legend(loc="upper right") + + # Plot Tonic. + ax2.set_title("Skin Conductance Level (SCL)") + ax2.plot( + x_axis, + eda_signals["EDA_Tonic"], + color="#673AB7", + label="Tonic Component", + linewidth=1.5, + ) + ax2.legend(loc="upper right") + return fig + else: + # Create interactive plot with plotly. + try: + import plotly.graph_objects as go + from plotly.subplots import make_subplots + + except ImportError as e: + raise ImportError( + "NeuroKit error: ppg_plot(): the 'plotly'", + " module is required when 'static' is False.", + " Please install it first (`pip install plotly`).", + ) from e + + fig = make_subplots( + rows=3, + cols=1, + shared_xaxes=True, + vertical_spacing=0.05, + subplot_titles=( + "Raw and Cleaned Signal", + "Skin Conductance Response (SCR)", + "Skin Conductance Level (SCL)", + ), + ) + + # Plot cleaned and raw electrodermal activity. + fig.add_trace( + go.Scatter( + x=x_axis, + y=eda_signals["EDA_Raw"], + mode="lines", + name="Raw", + line=dict(color="#B0BEC5"), + showlegend=True, + ), + row=1, + col=1, + ) + + fig.add_trace( + go.Scatter( + x=x_axis, + y=eda_signals["EDA_Clean"], + mode="lines", + name="Cleaned", + line=dict(color="#9C27B0"), + showlegend=True, + ), + row=1, + col=1, + ) + + # Plot skin conductance response. + fig.add_trace( + go.Scatter( + x=x_axis, + y=eda_signals["EDA_Phasic"], + mode="lines", + name="Phasic Component", + line=dict(color="#E91E63"), + showlegend=True, + ), + row=2, + col=1, + ) + + # Mark segments. + _, _, _ = _eda_plot_dashedsegments( + eda_signals, fig, x_axis, onsets, peaks, half_recovery, static=static + ) + + # TODO add dashed segments to plotly version + + # Plot skin conductance level. + fig.add_trace( + go.Scatter( + x=x_axis, + y=eda_signals["EDA_Tonic"], + mode="lines", + name="Tonic Component", + line=dict(color="#673AB7"), + showlegend=True, + ), + row=3, + col=1, + ) + + # Add title to entire figure. + fig.update_layout(title_text="Electrodermal Activity (EDA)", title_x=0.5) + + return fig # ============================================================================= # Internals # ============================================================================= -def _eda_plot_dashedsegments(eda_signals, ax, x_axis, onsets, peaks, half_recovery): +def _eda_plot_dashedsegments( + eda_signals, ax, x_axis, onsets, peaks, half_recovery, static=True +): # Mark onsets, peaks, and half-recovery. - scat_onset = ax.scatter( - x_axis[onsets], - eda_signals["EDA_Phasic"][onsets], - color="#FFA726", - label="SCR - Onsets", - zorder=2, - ) - scat_peak = ax.scatter( - x_axis[peaks], - eda_signals["EDA_Phasic"][peaks], - color="#1976D2", - label="SCR - Peaks", - zorder=2, - ) - scat_halfr = ax.scatter( - x_axis[half_recovery], - eda_signals["EDA_Phasic"][half_recovery], - color="#FDD835", - label="SCR - Half recovery", - zorder=2, - ) + onset_x_values = x_axis[onsets] + onset_y_values = eda_signals["EDA_Phasic"][onsets].values + peak_x_values = x_axis[peaks] + peak_y_values = eda_signals["EDA_Phasic"][peaks].values + halfr_x_values = x_axis[half_recovery] + halfr_y_values = eda_signals["EDA_Phasic"][half_recovery].values + end_onset = pd.Series( eda_signals["EDA_Phasic"][onsets].values, eda_signals["EDA_Phasic"][peaks].index ) - scat_endonset = ax.scatter(x_axis[end_onset.index], end_onset.values, alpha=0) - # Rise time. - risetime_start = scat_onset.get_offsets() - risetime_end = scat_endonset.get_offsets() - risetime_coord = [(risetime_start[i], risetime_end[i]) for i in range(0, len(onsets))] + risetime_coord = [] + amplitude_coord = [] + halfr_coord = [] + + for i in range(len(onsets)): + # Rise time. + start = (onset_x_values[i], onset_y_values[i]) + end = (peak_x_values[i], onset_y_values[i]) + risetime_coord.append((start, end)) - # SCR Amplitude. - peak_top = scat_peak.get_offsets() - amplitude_coord = [(peak_top[i], risetime_end[i]) for i in range(0, len(onsets))] + for i in range(len(peaks)): + # SCR Amplitude. + start = (peak_x_values[i], onset_y_values[i]) + end = (peak_x_values[i], peak_y_values[i]) + amplitude_coord.append((start, end)) - # Half recovery. - peak_x_values = peak_top.data[:, 0] - recovery_x_values = x_axis[half_recovery] + for i in range(len(half_recovery)): + # Half recovery. + end = (halfr_x_values[i], halfr_y_values[i]) + peak_x_idx = np.where(peak_x_values < halfr_x_values[i])[0][-1] + start = (peak_x_values[peak_x_idx], halfr_y_values[i]) + halfr_coord.append((start, end)) - peak_list = [] - for i, index in enumerate(half_recovery): - value = find_closest( - recovery_x_values[i], peak_x_values, direction="smaller", strictly=False + if static: + # Plot with matplotlib. + # Mark onsets, peaks, and half-recovery. + ax.scatter( + x_axis[onsets], + eda_signals["EDA_Phasic"][onsets], + color="#FFA726", + label="SCR - Onsets", + zorder=2, + ) + ax.scatter( + x_axis[peaks], + eda_signals["EDA_Phasic"][peaks], + color="#1976D2", + label="SCR - Peaks", + zorder=2, + ) + ax.scatter( + x_axis[half_recovery], + eda_signals["EDA_Phasic"][half_recovery], + color="#FDD835", + label="SCR - Half recovery", + zorder=2, ) - peak_list.append(value) - peak_index = [] - for i in np.array(peak_list): - index = np.where(i == peak_x_values)[0][0] - peak_index.append(index) + ax.scatter(x_axis[end_onset.index], end_onset.values, alpha=0) + else: + # Create interactive plot with plotly. + try: + import plotly.graph_objects as go - halfr_index = list(range(0, len(half_recovery))) - halfr_end = scat_halfr.get_offsets() - halfr_start = [(peak_top[i, 0], halfr_end[x, 1]) for i, x in zip(peak_index, halfr_index)] - halfr_coord = [(halfr_start[i], halfr_end[i]) for i in halfr_index] + except ImportError as e: + raise ImportError( + "NeuroKit error: ppg_plot(): the 'plotly'", + " module is required when 'static' is False.", + " Please install it first (`pip install plotly`).", + ) from e + # Plot with plotly. + # Mark onsets, peaks, and half-recovery. + ax.add_trace( + go.Scatter( + x=x_axis[onsets], + y=eda_signals["EDA_Phasic"][onsets], + mode="markers", + name="SCR - Onsets", + marker=dict(color="#FFA726"), + showlegend=True, + ), + row=2, + col=1, + ) + ax.add_trace( + go.Scatter( + x=x_axis[peaks], + y=eda_signals["EDA_Phasic"][peaks], + mode="markers", + name="SCR - Peaks", + marker=dict(color="#1976D2"), + showlegend=True, + ), + row=2, + col=1, + ) + ax.add_trace( + go.Scatter( + x=x_axis[half_recovery], + y=eda_signals["EDA_Phasic"][half_recovery], + mode="markers", + name="SCR - Half recovery", + marker=dict(color="#FDD835"), + showlegend=True, + ), + row=2, + col=1, + ) + ax.add_trace( + go.Scatter( + x=x_axis[end_onset.index], + y=end_onset.values, + mode="markers", + marker=dict(color="#FDD835", opacity=0), + showlegend=False, + ), + row=2, + col=1, + ) return risetime_coord, amplitude_coord, halfr_coord diff --git a/neurokit2/eda/eda_process.py b/neurokit2/eda/eda_process.py index 3df5c47781..e978dbb676 100644 --- a/neurokit2/eda/eda_process.py +++ b/neurokit2/eda/eda_process.py @@ -1,13 +1,16 @@ # -*- coding: utf-8 -*- import pandas as pd +from ..misc.report import create_report from ..signal import signal_sanitize from .eda_clean import eda_clean from .eda_peaks import eda_peaks from .eda_phasic import eda_phasic +from .eda_methods import eda_methods +from .eda_plot import eda_plot -def eda_process(eda_signal, sampling_rate=1000, method="neurokit"): +def eda_process(eda_signal, sampling_rate=1000, method="neurokit", report=None, **kwargs): """**Process Electrodermal Activity (EDA)** Convenience function that automatically processes electrodermal activity (EDA) signal. @@ -20,6 +23,14 @@ def eda_process(eda_signal, sampling_rate=1000, method="neurokit"): The sampling frequency of ``"rsp_signal"`` (in Hz, i.e., samples/second). method : str The processing pipeline to apply. Can be one of ``"biosppy"`` or ``"neurokit"`` (default). + report : str + The filename of a report containing description and figures of processing + (e.g. ``"myreport.html"``). Needs to be supplied if a report file + should be generated. Defaults to ``None``. Can also be ``"text"`` to + just print the text in the console without saving anything. + **kwargs + Other arguments to be passed to specific methods. For more information, + see :func:`.rsp_methods`. Returns ------- @@ -76,17 +87,29 @@ def eda_process(eda_signal, sampling_rate=1000, method="neurokit"): """ # Sanitize input eda_signal = signal_sanitize(eda_signal) + methods = eda_methods(sampling_rate=sampling_rate, method=method, **kwargs) # Preprocess - eda_cleaned = eda_clean(eda_signal, sampling_rate=sampling_rate, method=method) - eda_decomposed = eda_phasic(eda_cleaned, sampling_rate=sampling_rate) + # Clean signal + eda_cleaned = eda_clean(eda_signal, + sampling_rate=sampling_rate, + method=methods["method_cleaning"], + **methods["kwargs_cleaning"]) + if methods["method_phasic"] is None or methods["method_phasic"].lower() == "none": + eda_decomposed = pd.DataFrame({"EDA_Phasic": eda_cleaned}) + else: + eda_decomposed = eda_phasic(eda_cleaned, + sampling_rate=sampling_rate, + method=methods["method_phasic"], + **methods["kwargs_phasic"]) # Find peaks peak_signal, info = eda_peaks( eda_decomposed["EDA_Phasic"].values, sampling_rate=sampling_rate, - method=method, + method=methods["method_peaks"], amplitude_min=0.1, + **methods["kwargs_peaks"], ) info["sampling_rate"] = sampling_rate # Add sampling rate in dict info @@ -95,4 +118,12 @@ def eda_process(eda_signal, sampling_rate=1000, method="neurokit"): signals = pd.concat([signals, eda_decomposed, peak_signal], axis=1) + if report is not None: + # Generate report containing description and figures of processing + if ".html" in str(report): + fig = eda_plot(signals, sampling_rate=sampling_rate, static=False) + else: + fig = None + create_report(file=report, signals=signals, info=methods, fig=fig) + return signals, info diff --git a/neurokit2/eda/eda_simulate.py b/neurokit2/eda/eda_simulate.py index 3f8db8d2b2..e07d6645aa 100644 --- a/neurokit2/eda/eda_simulate.py +++ b/neurokit2/eda/eda_simulate.py @@ -1,11 +1,19 @@ # -*- coding: utf-8 -*- import numpy as np +from ..misc import check_random_state, check_random_state_children from ..signal import signal_distort, signal_merge def eda_simulate( - duration=10, length=None, sampling_rate=1000, noise=0.01, scr_number=1, drift=-0.01, random_state=None + duration=10, + length=None, + sampling_rate=1000, + noise=0.01, + scr_number=1, + drift=-0.01, + random_state=None, + random_state_distort="spawn", ): """**Simulate Electrodermal Activity (EDA) signal** @@ -25,8 +33,14 @@ def eda_simulate( Desired number of skin conductance responses (SCRs), i.e., peaks. Defaults to 1. drift : float or list The slope of a linear drift of the signal. Defaults to -0.01. - random_state : int - Seed for the random number generator. Defaults to None. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. + random_state_distort : {'legacy', 'spawn'}, None, int, numpy.random.RandomState or numpy.random.Generator + Random state to be used to distort the signal. If ``"legacy"``, use the same random state used to + generate the signal (discouraged as it creates dependent random streams). If ``"spawn"``, spawn + independent children random number generators from the random_state argument. If any of the other types, + generate independent children random number generators from the random_state_distort provided (this + allows generating multiple version of the same signal distorted by different random noise realizations). Returns ---------- @@ -59,7 +73,8 @@ def eda_simulate( """ # Seed the random generator for reproducible results - np.random.seed(random_state) + rng = check_random_state(random_state) + random_state_distort = check_random_state_children(random_state, random_state_distort, n_children=1) # Generate number of samples automatically if length is unspecified if length is None: @@ -72,7 +87,7 @@ def eda_simulate( start_peaks = np.linspace(0, duration, scr_number, endpoint=False) for start_peak in start_peaks: - relative_time_peak = np.abs(np.random.normal(0, 5, size=1)) + 3.0745 + relative_time_peak = np.abs(rng.normal(0, 5, size=1)) + 3.0745 scr = _eda_simulate_scr(sampling_rate=sampling_rate, time_peak=relative_time_peak) time_scr = [start_peak, start_peak + 9] if time_scr[0] < 0: @@ -93,9 +108,9 @@ def eda_simulate( noise_frequency=[5, 10, 100], noise_shape="laplace", silent=True, + random_state=random_state_distort[0], ) - # Reset random seed (so it doesn't affect global) - np.random.seed(None) + return eda diff --git a/neurokit2/eda/eda_sympathetic.py b/neurokit2/eda/eda_sympathetic.py index 03768a6118..3a9f311d0a 100644 --- a/neurokit2/eda/eda_sympathetic.py +++ b/neurokit2/eda/eda_sympathetic.py @@ -1,8 +1,11 @@ # -*- coding: utf-8 -*- +from warnings import warn + import numpy as np import pandas as pd import scipy +from ..misc import NeuroKitWarning from ..signal import signal_filter, signal_resample, signal_timefrequency from ..signal.signal_power import _signal_power_instant_compute from ..signal.signal_psd import _signal_psd_welch @@ -15,7 +18,8 @@ def eda_sympathetic( """**Sympathetic Nervous System Index from Electrodermal activity (EDA)** Derived from Posada-Quintero et al. (2016), who argue that dynamics of the sympathetic component - of EDA signal is represented in the frequency band of 0.045-0.25Hz. + of EDA signal is represented in the frequency band of 0.045-0.25Hz. Note that the Posada method + requires a signal of a least 64 seconds. Parameters ---------- @@ -39,8 +43,9 @@ def eda_sympathetic( Returns ------- dict - A dictionary containing the EDA symptathetic indexes, accessible by keys ``"EDA_Symp"`` and - ``"EDA_SympN"`` (normalized, obtained by dividing EDA_Symp by total power). + A dictionary containing the EDA sympathetic indexes, accessible by keys + ``"EDA_Sympathetic"`` and ``"EDA_SympatheticN"`` (normalized, obtained by dividing EDA_Symp + by total power). Examples -------- @@ -49,8 +54,14 @@ def eda_sympathetic( import neurokit2 as nk eda = nk.data('bio_resting_8min_100hz')['EDA'] - indexes_posada = nk.eda_sympathetic(eda, sampling_rate=100, method='posada', show=True) - indexes_ghiasi = nk.eda_sympathetic(eda, sampling_rate=100, method='ghiasi', show=True) + + @savefig p_eda_sympathetic1.png scale=100% + nk.eda_sympathetic(eda, sampling_rate=100, method='posada', show=True) + @suppress + plt.close() + + results = nk.eda_sympathetic(eda, sampling_rate=100, method='ghiasi') + results References ---------- @@ -67,12 +78,14 @@ def eda_sympathetic( out = {} - if method.lower() in ["ghiasi"]: + if method.lower() in ["ghiasi", "ghiasi2018"]: out = _eda_sympathetic_ghiasi( eda_signal, sampling_rate=sampling_rate, frequency_band=frequency_band, show=show ) - elif method.lower() in ["posada", "posada-quintero", "quintero"]: - out = _eda_sympathetic_posada(eda_signal, frequency_band=frequency_band, show=show) + elif method.lower() in ["posada", "posada-quintero", "quintero", "posada2016"]: + out = _eda_sympathetic_posada( + eda_signal, sampling_rate=sampling_rate, frequency_band=frequency_band, show=show + ) else: raise ValueError( "NeuroKit error: eda_sympathetic(): 'method' should be " "one of 'ghiasi', 'posada'." @@ -86,10 +99,30 @@ def eda_sympathetic( # ============================================================================= -def _eda_sympathetic_posada(eda_signal, frequency_band=[0.045, 0.25], show=True, out={}): +def _eda_sympathetic_posada( + eda_signal, frequency_band=[0.045, 0.25], sampling_rate=1000, show=True, out={} +): + + # This method assumes signal longer than 64 s + if len(eda_signal) <= sampling_rate * 64: + warn( + "The 'posada2016' method requires a signal of length > 60 s. Try with" + + " `method='ghiasi2018'`. Returning NaN values for now.", + category=NeuroKitWarning, + ) + return {"EDA_Sympathetic": np.nan, "EDA_SympatheticN": np.nan} + + # Resample the eda signal before calculate the synpathetic index based on Posada (2016) + eda_signal_400hz = signal_resample( + eda_signal, sampling_rate=sampling_rate, desired_sampling_rate=400 + ) + + # 8-th order Chebyshev Type I low-pass filter + sos = scipy.signal.cheby1(8, 1, 0.8, "lowpass", fs=400, output="sos") + eda_signal_filtered = scipy.signal.sosfilt(sos, eda_signal_400hz) # First step of downsampling - downsampled_1 = scipy.signal.decimate(eda_signal, q=10, n=8) # Keep every 10th sample + downsampled_1 = scipy.signal.decimate(eda_signal_filtered, q=10, n=8) # Keep every 10th sample downsampled_2 = scipy.signal.decimate(downsampled_1, q=20, n=8) # Keep every 20th sample # High pass filter @@ -118,10 +151,10 @@ def _eda_sympathetic_posada(eda_signal, frequency_band=[0.045, 0.25], show=True, ] if show is True: - ax = psd_plot.plot(x="Frequency", y="Power", title="EDA Power Spectral Density (ms^2/Hz)") + ax = psd_plot.plot(x="Frequency", y="Power", title="EDA Power Spectral Density (us^2/Hz)") ax.set(xlabel="Frequency (Hz)", ylabel="Spectrum") - out = {"EDA_Symp": eda_symp, "EDA_SympN": eda_symp_normalized} + out = {"EDA_Sympathetic": eda_symp, "EDA_SympatheticN": eda_symp_normalized} return out @@ -129,7 +162,6 @@ def _eda_sympathetic_posada(eda_signal, frequency_band=[0.045, 0.25], show=True, def _eda_sympathetic_ghiasi( eda_signal, sampling_rate=1000, frequency_band=[0.045, 0.25], show=True, out={} ): - min_frequency = frequency_band[0] max_frequency = frequency_band[1] @@ -150,6 +182,7 @@ def _eda_sympathetic_ghiasi( # Divide the signal into segments and obtain the timefrequency representation overlap = 59 * 50 # overlap of 59s in samples + # TODO: the plot should be improved for this specific case _, _, bins = signal_timefrequency( filtered, sampling_rate=desired_sampling_rate, @@ -165,6 +198,6 @@ def _eda_sympathetic_ghiasi( eda_symp = np.mean(bins) eda_symp_normalized = eda_symp / np.max(bins) - out = {"EDA_Symp": eda_symp, "EDA_SympN": eda_symp_normalized} + out = {"EDA_Sympathetic": eda_symp, "EDA_SympatheticN": eda_symp_normalized} return out diff --git a/neurokit2/eeg/eeg_simulate.py b/neurokit2/eeg/eeg_simulate.py index 426072b812..e5dc111e8c 100644 --- a/neurokit2/eeg/eeg_simulate.py +++ b/neurokit2/eeg/eeg_simulate.py @@ -1,7 +1,9 @@ import numpy as np +from ..misc import check_random_state -def eeg_simulate(duration=1, length=None, sampling_rate=1000, noise=0.1): + +def eeg_simulate(duration=1, length=None, sampling_rate=1000, noise=0.1, random_state=None): """**EEG Signal Simulation** Simulate an artificial EEG signal. This is a crude implementation based on the MNE-Python raw @@ -17,6 +19,8 @@ def eeg_simulate(duration=1, length=None, sampling_rate=1000, noise=0.1): The desired sampling rate (in Hz, i.e., samples/second). noise : float Noise level. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. Examples ---------- @@ -44,6 +48,9 @@ def eeg_simulate(duration=1, length=None, sampling_rate=1000, noise=0.1): "Please install it first (`pip install mne`).", ) from e + # Seed the random generator for reproducible results + rng = check_random_state(random_state) + # Generate number of samples automatically if length is unspecified if length is None: length = duration * sampling_rate @@ -76,15 +83,17 @@ def data_fun(times, n_dipoles=4): times = raw.times[: int(raw.info["sfreq"] * 2)] fwd = mne.read_forward_solution(fwd_file, verbose=False) stc = mne.simulation.simulate_sparse_stc( - fwd["src"], n_dipoles=n_dipoles, times=times, data_fun=data_fun + fwd["src"], + n_dipoles=n_dipoles, + times=times, + data_fun=data_fun, + random_state=rng, ) # Repeat the source activation multiple times. - raw_sim = mne.simulation.simulate_raw( - raw.info, [stc] * int(np.ceil(duration / 2)), forward=fwd, verbose=False - ) + raw_sim = mne.simulation.simulate_raw(raw.info, [stc] * int(np.ceil(duration / 2)), forward=fwd, verbose=False) cov = mne.make_ad_hoc_cov(raw_sim.info, std=noise / 1000000) - raw_sim = mne.simulation.add_noise(raw_sim, cov, iir_filter=[0.2, -0.2, 0.04], verbose=False) + raw_sim = mne.simulation.add_noise(raw_sim, cov, iir_filter=[0.2, -0.2, 0.04], verbose=False, random_state=rng) # Resample raw_sim = raw_sim.resample(sampling_rate, verbose=False) diff --git a/neurokit2/emg/emg_simulate.py b/neurokit2/emg/emg_simulate.py index 908d8f1b60..126acd0df4 100644 --- a/neurokit2/emg/emg_simulate.py +++ b/neurokit2/emg/emg_simulate.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np +from ..misc import check_random_state from ..signal import signal_resample @@ -11,7 +12,7 @@ def emg_simulate( noise=0.01, burst_number=1, burst_duration=1.0, - random_state=42, + random_state=None, ): """**Simulate an EMG signal** @@ -32,8 +33,8 @@ def emg_simulate( burst_duration : float or list Duration of the bursts. Can be a float (each burst will have the same duration) or a list of durations for each bursts. - random_state : int - Seed for the random number generator. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. Returns ---------- @@ -65,7 +66,7 @@ def emg_simulate( """ # Seed the random generator for reproducible results - np.random.seed(random_state) + rng = check_random_state(random_state) # Generate number of samples automatically if length is unspecified if length is None: @@ -89,14 +90,14 @@ def emg_simulate( # Generate bursts bursts = [] for burst in range(burst_number): - bursts += [list(np.random.uniform(-1, 1, size=int(1000 * burst_duration[burst])) + 0.08)] + bursts += [list(rng.uniform(-1, 1, size=int(1000 * burst_duration[burst])) + 0.08)] # Generate quiet n_quiet = burst_number + 1 # number of quiet periods (in between bursts) duration_quiet = (duration - total_duration_bursts) / n_quiet # duration of each quiet period quiets = [] for quiet in range(n_quiet): # pylint: disable=W0612 - quiets += [list(np.random.uniform(-0.05, 0.05, size=int(1000 * duration_quiet)) + 0.08)] + quiets += [list(rng.uniform(-0.05, 0.05, size=int(1000 * duration_quiet)) + 0.08)] # Merge the two emg = [] @@ -107,7 +108,7 @@ def emg_simulate( emg = np.array(emg) # Add random (gaussian distributed) noise - emg += np.random.normal(0, noise, len(emg)) + emg += rng.normal(0, noise, len(emg)) # Resample emg = signal_resample( diff --git a/neurokit2/hrv/hrv_frequency.py b/neurokit2/hrv/hrv_frequency.py index 1c3ebc1ae3..d56667bc2d 100644 --- a/neurokit2/hrv/hrv_frequency.py +++ b/neurokit2/hrv/hrv_frequency.py @@ -265,12 +265,19 @@ def _hrv_frequency_show( __, ax = plt.subplots() frequency_band = [ulf, vlf, lf, hf, vhf] + + # Compute sampling rate for plot windows + if sampling_rate is None: + med_sampling_rate = np.median(np.diff(t)) # This is just for visualization purposes (#800) + else: + med_sampling_rate = sampling_rate + for i in range(len(frequency_band)): # pylint: disable=C0200 min_frequency = frequency_band[i][0] if min_frequency == 0: min_frequency = 0.001 # sanitize lowest frequency - window_length = int((2 / min_frequency) * sampling_rate) + window_length = int((2 / min_frequency) * med_sampling_rate) if window_length <= len(rri) / 2: break diff --git a/neurokit2/markov/markov_simulate.py b/neurokit2/markov/markov_simulate.py index 03d1599178..eb836a0246 100644 --- a/neurokit2/markov/markov_simulate.py +++ b/neurokit2/markov/markov_simulate.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- import numpy as np -import scipy.stats +from ..misc import check_random_state from .transition_matrix import _sanitize_tm_input -def markov_simulate(tm, n=10): +def markov_simulate(tm, n=10, random_state=None): """**Markov Chain Simulation** Given a :func:`transition_matrix`, this function simulates the corresponding sequence of states @@ -17,6 +17,8 @@ def markov_simulate(tm, n=10): A probability matrix obtained from :func:`transition_matrix`. n : int Length of the simulated sequence. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. Returns ------- @@ -51,13 +53,13 @@ def markov_simulate(tm, n=10): seq = np.zeros(n, dtype=int) seq[0] = _start - # random seeds - random_states = np.random.randint(0, n, n) + # Seed the random generator for reproducible results + rng = check_random_state(random_state) # simulation procedure for i in range(1, n): _ps = tm.values[seq[i - 1]] - _sample = np.argmax(scipy.stats.multinomial.rvs(1, _ps, 1, random_state=random_states[i])) + _sample = rng.choice(len(_ps), p=_ps) seq[i] = _sample return states[seq] diff --git a/neurokit2/microstates/microstates_segment.py b/neurokit2/microstates/microstates_segment.py index 8f19a95a24..62eb176f3d 100644 --- a/neurokit2/microstates/microstates_segment.py +++ b/neurokit2/microstates/microstates_segment.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np +from ..misc import check_random_state from ..stats import cluster from ..stats.cluster_quality import _cluster_quality_gev from .microstates_classify import microstates_classify @@ -186,12 +187,11 @@ def microstates_segment( # Run clustering algorithm if method in ["kmods", "kmod", "kmeans modified", "modified kmeans"]: - # If no random state specified, generate a random state - if not isinstance(random_state, np.random.RandomState): - random_state = np.random.RandomState(random_state) + # Seed the random generator for reproducible results + rng = check_random_state(random_state) # Generate one random integer for each run - random_state = random_state.choice(range(n_runs * 1000), n_runs, replace=False) + random_state = rng.choice(n_runs * 1000, n_runs, replace=False) # Initialize values gev = 0 @@ -244,11 +244,7 @@ def microstates_segment( else: # Run clustering algorithm on subset _, microstates, info = cluster( - data[:, indices].T, - method=method, - n_clusters=n_microstates, - random_state=random_state, - **kwargs + data[:, indices].T, method=method, n_clusters=n_microstates, random_state=random_state, **kwargs ) # Run segmentation on the whole dataset diff --git a/neurokit2/misc/__init__.py b/neurokit2/misc/__init__.py index 365f475eb5..0a49dfcf9a 100644 --- a/neurokit2/misc/__init__.py +++ b/neurokit2/misc/__init__.py @@ -1,6 +1,11 @@ -"""Submodule for NeuroKit.""" +"""Submodule for NeuroKit. + +isort:skip_file (since isort-ing the imports generates circular imports) + +""" from ._warnings import NeuroKitWarning +from .random import check_random_state, check_random_state_children, spawn_random_state from .check_type import check_type from .copyfunction import copyfunction from .expspace import expspace @@ -15,6 +20,8 @@ from .progress_bar import progress_bar from .replace import replace from .type_converters import as_vector +from .report import create_report + __all__ = [ "listify", @@ -32,4 +39,8 @@ "progress_bar", "find_plateau", "copyfunction", + "check_random_state", + "check_random_state_children", + "spawn_random_state", + "create_report", ] diff --git a/neurokit2/misc/random.py b/neurokit2/misc/random.py new file mode 100644 index 0000000000..98fc9b85bb --- /dev/null +++ b/neurokit2/misc/random.py @@ -0,0 +1,122 @@ +import copy +import numbers + +import numpy as np + + +def check_random_state(seed=None): + """**Turn seed into a random number generator** + + Parameters + ---------- + seed : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. If seed is None, a numpy.random.Generator is created with fresh, + unpredictable entropy. If seed is an int, a new numpy.random.RandomState instance is created, seeded with + seed. If seed is already a Generator or RandomState instance then that instance is used. + The manin difference between the legacy RandomState class and the new Generator class is that the former + has better reproducibililty and compatibililty guarantees (it is effectively frozen from NumPy v1.16) + while the latter has better statistical "randomness" properties and lower computational cost. + See: https://numpy.org/doc/stable/reference/random/legacy.html for further information. + Note: to initialise the new Generator class with an integer seed, use, e.g.: + ``check_random_state(np.random.SeedSequence(123))``. + + Returns + ------- + rng: numpy.random.Generator or numpy.random.RandomState + Random number generator. + """ + # If seed is an integer, use the legacy RandomState class + if isinstance(seed, numbers.Integral): + return np.random.RandomState(seed) + # If seed is already a random number generator class return it as it is + if isinstance(seed, (np.random.Generator, np.random.RandomState)): + return seed + # If seed is something else, use the new Generator class + return np.random.default_rng(seed) + + +def spawn_random_state(rng, n_children=1): + """**Create new independent children random number generators from parent generator/seed** + + Parameters + ---------- + rng : None, int, numpy.random.RandomState or numpy.random.Generator + Random number generator to be spawned (numpy.random.RandomState or numpy.random.Generator). If it is None + or an int seed, then a parent random number generator is first created with ``misc.check_random_state``. + n_children : int + Number of children generators to be spawned. + + Returns + ------- + children_generators : list of generators + List of children random number generators. + + Examples + ---------- + * **Example 1**: Simulate data for a cohort of participants + + .. ipython:: python + + import neurokit2 as nk + + master_seed = 42 + n_participants = 8 + participants_RNGs = nk.misc.spawn_random_state(master_seed, n_children=n_participants) + PPGs = [] + for i in range(n_participants): + PPGs.append(nk.ppg_simulate(random_state=participants_RNGs[i])) + """ + rng = check_random_state(rng) + + try: + # Try to spawn the rng by using the new API + return rng.spawn(n_children) + except AttributeError: + # It looks like this version of numpy does not implement rng.spawn(), so we do its job + # manually; see: https://github.com/numpy/numpy/pull/23195 + if rng._bit_generator._seed_seq is not None: + rng_class = type(rng) + bit_generator_class = type(rng._bit_generator) + return [rng_class(bit_generator_class(seed=s)) for s in rng._bit_generator._seed_seq.spawn(n_children)] + except TypeError: + # The rng does not support spawning through SeedSequence, see below + pass + + # Implement a rudimentary but reproducible substitute for spawning rng's that also works for + # RandomState with the legacy MT19937 bit generator + # NOTE: Spawning the same generator multiple times is not supported (may lead to mutually + # dependent spawned generators). Spawning the children (in a tree structure) is allowed. + + # Start by creating an rng to sample integers (to be used as seeds for the children) without + # advancing the original rng + temp_rng = rng._bit_generator.jumped() + # Generate and return children initialised with the seeds obtained from temp_rng + return [np.random.RandomState(seed=s) for s in temp_rng.random_raw(n_children)] + + +def check_random_state_children(random_state_parent, random_state_children, n_children=1): + """**Create new independent children random number generators to be used in sub-functions** + + Parameters + ---------- + random_state_parent : None, int, numpy.random.RandomState or numpy.random.Generator + Parent's random state (see ``misc.check_random_state``). + random_state_children : {'legacy', 'spawn'}, None, int, numpy.random.RandomState or numpy.random.Generator + If ``"legacy"``, use the same random state as the parent (discouraged as it generates dependent random + streams). If ``"spawn"``, spawn independent children random number generators from the parent random + state. If any of the other types, generate independent children random number generators from the + random_state_children provided. + n_children : int + Number of children generators to be spawned. + + Returns + ------- + children_generators : list of generators + List of children random number generators. + """ + if random_state_children == "legacy": + return [copy.copy(random_state_parent) for _ in range(n_children)] + elif random_state_children == "spawn": + return spawn_random_state(random_state_parent, n_children) + else: + return spawn_random_state(random_state_children, n_children) diff --git a/neurokit2/misc/report.py b/neurokit2/misc/report.py index 96d89fd33f..444a92cc47 100644 --- a/neurokit2/misc/report.py +++ b/neurokit2/misc/report.py @@ -1,6 +1,100 @@ # -*- coding: utf-8 -*- import inspect +import matplotlib +import numpy as np +import pandas as pd + + +def create_report(file="myreport.html", signals=None, info={"sampling_rate": 1000}, fig=None): + """**Reports** + + Create report containing description and figures of processing. + This function is meant to be used via the :func:`.rsp_process` or :func:`.ppg_process` + functions. + + Parameters + ---------- + file : str + Name of the file to save the report to. Can also be ``"text"`` to simply print the text in + the console. + signals : pd.DataFrame + A DataFrame of signals. Usually obtained from :func:`.rsp_process` or :func:`.ppg_process` + info : dict + A dictionary containing the information of peaks and the signals' sampling rate. Usually + obtained from :func:`.rsp_process` or :func:`.ppg_process`. + fig : matplotlib.figure.Figure or plotly.graph_objects.Figure + A figure containing the processed signals. Usually obtained from :func:`.rsp_plot` or + :func:`.ppg_plot`. + + Returns + ------- + str + The report as a string. + + See Also + -------- + rsp_process, ppg_process + + Examples + -------- + .. ipython:: python + + import neurokit2 as nk + + rsp = nk.rsp_simulate(duration=30, sampling_rate=200, random_state=0) + signals, info = nk.rsp_process(rsp, sampling_rate=200, report="text") + + """ + + description, ref = text_combine(info) + table_html, table_md = summarize_table(signals) + + # Print text in the console + for key in [k for k in info.keys() if "text_" in k]: + print(info[key] + "\n") + + print(table_md) + + print("\nReferences") + for s in info["references"]: + print("- " + s) + + # Save report + if ".html" in file: + # Make figures + fig_html = '

Visualization

' + fig_html += fig_to_html(fig) + print(f"The report has been saved to {file}") + contents = [description, table_html, fig_html, ref] + html_save(contents=contents, file=file) + + +def summarize_table(signals): + """Create table to summarize statistics of a RSP signal.""" + + # TODO: add more features + summary = {} + + rate_cols = [col for col in signals.columns if "Rate" in col] + if len(rate_cols) > 0: + rate_col = rate_cols[0] + summary[rate_col + "_Mean"] = np.mean(signals[rate_col]) + summary[rate_col + "_SD"] = np.std(signals[rate_col]) + summary_table = pd.DataFrame(summary, index=[0]) + # Make HTML and Markdown versions + html = '

Summary table

' + summary_table.to_html( + index=None + ) + + try: + md = summary_table.to_markdown(index=None) + except ImportError: + md = summary_table # in case printing markdown export fails + return html, md + else: + return "", "" + def text_combine(info): """Reformat dictionary describing processing methods as strings to be inserted into HTML file.""" @@ -17,6 +111,32 @@ def text_combine(info): return preprocessing, ref +def fig_to_html(fig): + """Convert a figure to HTML.""" + if isinstance(fig, str): + return fig + elif isinstance(fig, matplotlib.pyplot.Figure): + # https://stackoverflow.com/questions/48717794/matplotlib-embed-figures-in-auto-generated-html + import base64 + from io import BytesIO + + temp_file = BytesIO() + fig.savefig(temp_file, format="png") + encoded = base64.b64encode(temp_file.getvalue()).decode("utf-8") + return "".format(encoded) + else: + try: + import plotly + + if isinstance(fig, plotly.graph_objs._figure.Figure): + # https://stackoverflow.com/questions/59868987/plotly-saving-multiple-plots-into-a-single-html + return fig.to_html().split("")[1].split("")[0] + else: + return "" + except ImportError: + return "" + + def html_save(contents=[], file="myreport.html"): """Combine figures and text in a single HTML document.""" # https://stackoverflow.com/questions/59868987/plotly-saving-multiple-plots-into-a-single-html @@ -66,7 +186,11 @@ def get_default_args(func): """Get the default values of a function's arguments.""" # https://stackoverflow.com/questions/12627118/get-a-function-arguments-default-value signature = inspect.signature(func) - return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty} + return { + k: v.default + for k, v in signature.parameters.items() + if v.default is not inspect.Parameter.empty + } def get_kwargs(report_info, func): diff --git a/neurokit2/ppg/ppg_clean.py b/neurokit2/ppg/ppg_clean.py index 3a0d88600a..0d9f14eb24 100644 --- a/neurokit2/ppg/ppg_clean.py +++ b/neurokit2/ppg/ppg_clean.py @@ -82,6 +82,8 @@ def ppg_clean(ppg_signal, sampling_rate=1000, heart_rate=None, method="elgendi") clean = _ppg_clean_elgendi(ppg_signal, sampling_rate) elif method in ["nabian2018"]: clean = _ppg_clean_nabian2018(ppg_signal, sampling_rate, heart_rate=heart_rate) + elif method is None or method == "none": + clean = ppg_signal else: raise ValueError("`method` not found. Must be one of 'elgendi' or 'nabian2018'.") diff --git a/neurokit2/ppg/ppg_methods.py b/neurokit2/ppg/ppg_methods.py index a248e5ebac..8b4e778516 100644 --- a/neurokit2/ppg/ppg_methods.py +++ b/neurokit2/ppg/ppg_methods.py @@ -120,7 +120,7 @@ def ppg_methods( An open-source feature extraction tool for the analysis of peripheral physiological data. IEEE Journal of Translational Engineering in Health and Medicine, 6, 1-11.""" ) - elif method_cleaning == "none": + elif method_cleaning in ["none"]: report_info["text_cleaning"] += " was directly used for peak detection without preprocessing." else: # just in case more methods are added diff --git a/neurokit2/ppg/ppg_plot.py b/neurokit2/ppg/ppg_plot.py index 004f01e652..5dfdcd3e0f 100644 --- a/neurokit2/ppg/ppg_plot.py +++ b/neurokit2/ppg/ppg_plot.py @@ -110,6 +110,7 @@ def ppg_plot(ppg_signals, sampling_rate=None, static=True): ) ax1.axhline(y=ppg_rate_mean, label="Mean", linestyle="--", color="#FBB41C") ax1.legend(loc="upper right") + return fig else: try: import plotly.graph_objects as go diff --git a/neurokit2/ppg/ppg_process.py b/neurokit2/ppg/ppg_process.py index c4500dd1d0..a7ffee2c42 100644 --- a/neurokit2/ppg/ppg_process.py +++ b/neurokit2/ppg/ppg_process.py @@ -2,13 +2,13 @@ import pandas as pd from ..misc import as_vector +from ..misc.report import create_report from ..signal import signal_rate from ..signal.signal_formatpeaks import _signal_from_indices from .ppg_clean import ppg_clean from .ppg_findpeaks import ppg_findpeaks from .ppg_methods import ppg_methods -from .ppg_report import ppg_report - +from .ppg_plot import ppg_plot def ppg_process(ppg_signal, sampling_rate=1000, method="elgendi", report=None, **kwargs): """**Process a photoplethysmogram (PPG) signal** @@ -68,16 +68,13 @@ def ppg_process(ppg_signal, sampling_rate=1000, method="elgendi", report=None, * ppg_signal = as_vector(ppg_signal) methods = ppg_methods(sampling_rate=sampling_rate, method=method, **kwargs) - if methods["method_cleaning"] is None or methods["method_cleaning"].lower() == "none": - ppg_cleaned = ppg_signal - else: - # Clean signal - ppg_cleaned = ppg_clean( - ppg_signal, - sampling_rate=sampling_rate, - method=methods["method_cleaning"], - **methods["kwargs_cleaning"] - ) + # Clean signal + ppg_cleaned = ppg_clean( + ppg_signal, + sampling_rate=sampling_rate, + method=methods["method_cleaning"], + **methods["kwargs_cleaning"] + ) # Find peaks info = ppg_findpeaks( @@ -109,6 +106,10 @@ def ppg_process(ppg_signal, sampling_rate=1000, method="elgendi", report=None, * if report is not None: # Generate report containing description and figures of processing - ppg_report(file=report, signals=signals, info=methods) + if ".html" in str(report): + fig = ppg_plot(signals, sampling_rate=sampling_rate) + else: + fig = None + create_report(file=report, signals=signals, info=methods, fig=fig) return signals, info diff --git a/neurokit2/ppg/ppg_report.py b/neurokit2/ppg/ppg_report.py deleted file mode 100644 index 3c88231cb0..0000000000 --- a/neurokit2/ppg/ppg_report.py +++ /dev/null @@ -1,96 +0,0 @@ -import numpy as np -import pandas as pd - -from ..misc.report import html_save, text_combine -from .ppg_plot import ppg_plot - - -def ppg_report(file="myreport.html", signals=None, info={"sampling_rate": 1000}): - """**PPG Reports** - - Create report containing description and figures of processing. - This function is meant to be used via the `ppg_process()` function. - - Parameters - ---------- - file : str - Name of the file to save the report to. Can also be ``"text"`` to simply print the text in - the console. - signals : pd.DataFrame - A DataFrame of signals. Usually obtained from :func:`.ppg_process`. - info : dict - A dictionary containing the information of peaks and the signals' sampling rate. Usually - obtained from :func:`.ppg_process`. - - Returns - ------- - str - The report as a string. - - See Also - -------- - ppg_process - - Examples - -------- - .. ipython:: python - - import neurokit2 as nk - - ppg = nk.ppg_simulate(duration=10, sampling_rate=200, heart_rate=70) - signals, info = nk.ppg_process(ppg, sampling_rate=200, report="console_only") - - """ - - description, ref = text_combine(info) - table_html, table_md = ppg_table(signals) - - # Print text in the console - for key in ["text_cleaning", "text_peaks"]: - print(info[key] + "\n") - - print(table_md) - - print("\nReferences") - for s in info["references"]: - print("- " + s) - - # Make figures - fig = '

Visualization

' - fig += ( - ppg_plot(signals, sampling_rate=info["sampling_rate"], static=False) - .to_html() - .split("")[1] - .split("")[0] - ) - - # Save report - if ".html" in file: - print(f"The report has been saved to {file}") - contents = [description, table_html, fig, ref] - html_save(contents=contents, file=file) - - -# ============================================================================= -# Internals -# ============================================================================= -def ppg_table(signals): - """Create table to summarize statistics of a PPG signal.""" - - # TODO: add more features - summary = {} - - summary["PPG_Rate_Mean"] = np.mean(signals["PPG_Rate"]) - summary["PPG_Rate_SD"] = np.std(signals["PPG_Rate"]) - summary_table = pd.DataFrame(summary, index=[0]) # .transpose() - - # Make HTML and Markdown versions - html = '

Summary table

' + summary_table.to_html( - index=None - ) - - try: - md = summary_table.to_markdown(index=None) - except ImportError: - md = summary_table # in case printing markdown export fails - return html, md diff --git a/neurokit2/ppg/ppg_simulate.py b/neurokit2/ppg/ppg_simulate.py index e913fa250b..b30aeaad27 100644 --- a/neurokit2/ppg/ppg_simulate.py +++ b/neurokit2/ppg/ppg_simulate.py @@ -4,6 +4,7 @@ import numpy as np import scipy.interpolate +from ..misc import check_random_state, check_random_state_children from ..signal import signal_distort @@ -19,6 +20,7 @@ def ppg_simulate( burst_number=0, burst_amplitude=1, random_state=None, + random_state_distort="spawn", show=False, ): """**Simulate a photoplethysmogram (PPG) signal** @@ -62,8 +64,14 @@ def ppg_simulate( Determines how many high frequency burst artifacts occur. The default is 0. show : bool If ``True``, returns a plot of the landmarks and interpolated PPG. Useful for debugging. - random_state : int - Seed for the random number generator. Keep it fixed for reproducible results. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. + random_state_distort : {'legacy', 'spawn'}, None, int, numpy.random.RandomState or numpy.random.Generator + Random state to be used to distort the signal. If ``"legacy"``, use the same random state used to + generate the signal (discouraged as it creates dependent random streams). If ``"spawn"``, spawn + independent children random number generators from the random_state argument. If any of the other types, + generate independent children random number generators from the random_state_distort provided (this + allows generating multiple version of the same signal distorted by different random noise realizations). Returns ------- @@ -83,7 +91,9 @@ def ppg_simulate( ppg = nk.ppg_simulate(duration=40, sampling_rate=500, heart_rate=75, random_state=42) """ - np.random.seed(random_state) + # Seed the random generator for reproducible results + rng = check_random_state(random_state) + random_state_distort = check_random_state_children(random_state, random_state_distort, n_children=4) # At the requested sampling rate, how long is a period at the requested # heart-rate and how often does that period fit into the requested @@ -104,24 +114,24 @@ def ppg_simulate( ) # Randomly modulate duration of waves by subracting a random value between # 0 and ibi_randomness% of the wave duration (see function definition). - x_onset = _random_x_offset(x_onset, ibi_randomness) + x_onset = _random_x_offset(x_onset, ibi_randomness, rng) # Corresponding signal amplitudes. - y_onset = np.random.normal(0, 0.1, n_period) + y_onset = rng.normal(0, 0.1, n_period) # Seconds at which the systolic peaks occur within the waves. - x_sys = x_onset + np.random.normal(0.175, 0.01, n_period) * periods + x_sys = x_onset + rng.normal(0.175, 0.01, n_period) * periods # Corresponding signal amplitudes. - y_sys = y_onset + np.random.normal(1.5, 0.15, n_period) + y_sys = y_onset + rng.normal(1.5, 0.15, n_period) # Seconds at which the dicrotic notches occur within the waves. - x_notch = x_onset + np.random.normal(0.4, 0.001, n_period) * periods + x_notch = x_onset + rng.normal(0.4, 0.001, n_period) * periods # Corresponding signal amplitudes (percentage of systolic peak height). - y_notch = y_sys * np.random.normal(0.49, 0.01, n_period) + y_notch = y_sys * rng.normal(0.49, 0.01, n_period) # Seconds at which the diastolic peaks occur within the waves. - x_dia = x_onset + np.random.normal(0.45, 0.001, n_period) * periods + x_dia = x_onset + rng.normal(0.45, 0.001, n_period) * periods # Corresponding signal amplitudes (percentage of systolic peak height). - y_dia = y_sys * np.random.normal(0.51, 0.01, n_period) + y_dia = y_sys * rng.normal(0.51, 0.01, n_period) x_all = np.concatenate((x_onset, x_sys, x_notch, x_dia)) x_all.sort(kind="mergesort") @@ -158,7 +168,7 @@ def ppg_simulate( sampling_rate=sampling_rate, noise_amplitude=drift, noise_frequency=drift_freq, - random_state=random_state, + random_state=random_state_distort[0], silent=True, ) # Add motion artifacts. @@ -169,7 +179,7 @@ def ppg_simulate( sampling_rate=sampling_rate, noise_amplitude=motion_amplitude, noise_frequency=motion_freq, - random_state=random_state, + random_state=random_state_distort[1], silent=True, ) # Add high frequency bursts. @@ -180,7 +190,7 @@ def ppg_simulate( artifacts_amplitude=burst_amplitude, artifacts_frequency=100, artifacts_number=burst_number, - random_state=random_state, + random_state=random_state_distort[2], silent=True, ) # Add powerline noise. @@ -190,7 +200,7 @@ def ppg_simulate( sampling_rate=sampling_rate, powerline_amplitude=powerline_amplitude, powerline_frequency=50, - random_state=random_state, + random_state=random_state_distort[3], silent=True, ) @@ -240,7 +250,7 @@ def _frequency_modulation(periods, seconds, modulation_frequency, modulation_str return periods_modulated, seconds_modulated -def _random_x_offset(x, offset_weight): +def _random_x_offset(x, offset_weight, rng): """From each wave onset xi subtract offset_weight * (xi - xi-1) where xi-1 is the wave onset preceding xi. offset_weight must be between 0 and 1. """ @@ -261,7 +271,7 @@ def _random_x_offset(x, offset_weight): return x max_offsets = offset_weight * x_diff - offsets = [np.random.uniform(0, i) for i in max_offsets] + offsets = [rng.uniform(0, i) for i in max_offsets] x_offset = x.copy() x_offset[1:] -= offsets diff --git a/neurokit2/rsp/rsp_amplitude.py b/neurokit2/rsp/rsp_amplitude.py index 6c49b3278a..f2de9caab4 100644 --- a/neurokit2/rsp/rsp_amplitude.py +++ b/neurokit2/rsp/rsp_amplitude.py @@ -78,7 +78,7 @@ def rsp_amplitude( # Format input. peaks, troughs = _rsp_fixpeaks_retrieve(peaks, troughs) - # To consistenty calculate amplitude, peaks and troughs must have the same + # To consistently calculate amplitude, peaks and troughs must have the same # number of elements, and the first trough must precede the first peak. if (peaks.size != troughs.size) or (peaks[0] <= troughs[0]): raise TypeError( diff --git a/neurokit2/rsp/rsp_clean.py b/neurokit2/rsp/rsp_clean.py index 033c532a13..0289fa99c9 100644 --- a/neurokit2/rsp/rsp_clean.py +++ b/neurokit2/rsp/rsp_clean.py @@ -95,6 +95,8 @@ def rsp_clean(rsp_signal, sampling_rate=1000, method="khodadad2018", **kwargs): rsp_signal, **kwargs, ) + elif method is None or method == "none": + clean = rsp_signal else: raise ValueError( "NeuroKit error: rsp_clean(): 'method' should be one of 'khodadad2018', 'biosppy' or 'hampel'." diff --git a/neurokit2/rsp/rsp_findpeaks.py b/neurokit2/rsp/rsp_findpeaks.py index 50e53dd33b..d6409860e4 100644 --- a/neurokit2/rsp/rsp_findpeaks.py +++ b/neurokit2/rsp/rsp_findpeaks.py @@ -141,6 +141,12 @@ def _rsp_findpeaks_scipy(rsp_cleaned, sampling_rate, peak_distance=0.8, peak_pro -rsp_cleaned, distance=peak_distance, prominence=peak_prominence ) + # Combine peaks and troughs and sort them. + extrema = np.sort(np.concatenate((peaks, troughs))) + # Sanitize. + extrema, amplitudes = _rsp_findpeaks_outliers(rsp_cleaned, extrema, amplitude_min=0) + peaks, troughs = _rsp_findpeaks_sanitize(extrema, amplitudes) + info = {"RSP_Peaks": peaks, "RSP_Troughs": troughs} return info diff --git a/neurokit2/rsp/rsp_methods.py b/neurokit2/rsp/rsp_methods.py index 4919e22e0b..169dd35c22 100644 --- a/neurokit2/rsp/rsp_methods.py +++ b/neurokit2/rsp/rsp_methods.py @@ -3,7 +3,7 @@ from ..misc.report import get_kwargs from .rsp_clean import rsp_clean -from .rsp_findpeaks import rsp_findpeaks +from .rsp_peaks import rsp_peaks from .rsp_rvt import rsp_rvt @@ -39,14 +39,14 @@ def rsp_methods( The method used to find peaks. If ``"default"``, will be set to the value of ``"method"``. Defaults to ``"default"``. For more information, see the ``"method"`` argument - of :func:`.rsp_findpeaks`. + of :func:`.rsp_peaks`. method_rvt: str The method used to compute respiratory volume per time. Defaults to ``"harrison"``. For more information, see the ``"method"`` argument of :func:`.rsp_rvt`. **kwargs - Other arguments to be passed to :func:`.rsp_clean` and - :func:`.rsp_findpeaks`. + Other arguments to be passed to :func:`.rsp_clean`, + :func:`.rsp_peaks`, and :func:`.rsp_rvt`. Returns ------- @@ -87,7 +87,7 @@ def rsp_methods( # Get arguments to be passed to cleaning and peak finding functions kwargs_cleaning, report_info = get_kwargs(report_info, rsp_clean) - kwargs_peaks, report_info = get_kwargs(report_info, rsp_findpeaks) + kwargs_peaks, report_info = get_kwargs(report_info, rsp_peaks) kwargs_rvt, report_info = get_kwargs(report_info, rsp_rvt) # Save keyword arguments in dictionary @@ -125,12 +125,12 @@ def rsp_methods( including systematic changes and “missed” deep breaths. NeuroImage, Volume 204, 116234""" ) - elif method_cleaning in ["biossppy"]: + elif method_cleaning in ["biosppy"]: report_info["text_cleaning"] += ( " was preprocessed using a second order 0.1-0.35 Hz bandpass " + "Butterworth filter followed by a constant detrending." ) - elif method_cleaning is None or method_cleaning == "none": + elif method_cleaning in ["none"]: report_info[ "text_cleaning" ] += "was directly used for peak detection without preprocessing." diff --git a/neurokit2/rsp/rsp_plot.py b/neurokit2/rsp/rsp_plot.py index e421b932b6..7c7378b151 100644 --- a/neurokit2/rsp/rsp_plot.py +++ b/neurokit2/rsp/rsp_plot.py @@ -4,7 +4,7 @@ import pandas as pd -def rsp_plot(rsp_signals, sampling_rate=None, figsize=(10, 10)): +def rsp_plot(rsp_signals, sampling_rate=None, figsize=(10, 10), static=True): """**Visualize respiration (RSP) data** Parameters @@ -15,6 +15,10 @@ def rsp_plot(rsp_signals, sampling_rate=None, figsize=(10, 10)): The desired sampling rate (in Hz, i.e., samples/second). figsize : tuple The size of the figure (width, height) in inches. + static : bool + If True, a static plot will be generated with matplotlib. + If False, an interactive plot will be generated with plotly. + Defaults to True. See Also -------- @@ -22,13 +26,8 @@ def rsp_plot(rsp_signals, sampling_rate=None, figsize=(10, 10)): Returns ------- - Though the function returns nothing, the figure can be retrieved and saved as follows: - - .. code-block:: console - - # To be run after rsp_plot() - fig = plt.gcf() - fig.savefig("myfig.png") + fig + Figure representing a plot of the processed RSP signals. Examples -------- @@ -56,122 +55,322 @@ def rsp_plot(rsp_signals, sampling_rate=None, figsize=(10, 10)): exhale = np.where(rsp_signals["RSP_Phase"] == 0)[0] nrow = 2 + + # Determine mean rate. + rate_mean = np.mean(rsp_signals["RSP_Rate"]) + if "RSP_Amplitude" in list(rsp_signals.columns): nrow += 1 + # Determine mean amplitude. + amplitude_mean = np.mean(rsp_signals["RSP_Amplitude"]) if "RSP_RVT" in list(rsp_signals.columns): nrow += 1 + # Determine mean RVT. + rvt_mean = np.mean(rsp_signals["RSP_RVT"]) if "RSP_Symmetry_PeakTrough" in list(rsp_signals.columns): nrow += 1 - fig, ax = plt.subplots(nrows=nrow, ncols=1, sharex=True, figsize=figsize) + # Get signals marking inspiration and expiration. + exhale_signal, inhale_signal = _rsp_plot_phase(rsp_signals, troughs, peaks) # Determine unit of x-axis. - last_ax = fig.get_axes()[-1] if sampling_rate is not None: - last_ax.set_xlabel("Time (seconds)") + x_label = "Time (seconds)" x_axis = np.linspace(0, len(rsp_signals) / sampling_rate, len(rsp_signals)) else: - last_ax.set_xlabel("Samples") + x_label = "Samples" x_axis = np.arange(0, len(rsp_signals)) - # Plot cleaned and raw respiration as well as peaks and troughs. - ax[0].set_title("Raw and Cleaned Signal") - fig.suptitle("Respiration (RSP)", fontweight="bold") - - ax[0].plot(x_axis, rsp_signals["RSP_Raw"], color="#B0BEC5", label="Raw", zorder=1) - ax[0].plot( - x_axis, rsp_signals["RSP_Clean"], color="#2196F3", label="Cleaned", zorder=2, linewidth=1.5 - ) - - ax[0].scatter( - x_axis[peaks], - rsp_signals["RSP_Clean"][peaks], - color="red", - label="Exhalation Onsets", - zorder=3, - ) - ax[0].scatter( - x_axis[troughs], - rsp_signals["RSP_Clean"][troughs], - color="orange", - label="Inhalation Onsets", - zorder=4, - ) - - # Shade region to mark inspiration and expiration. - exhale_signal, inhale_signal = _rsp_plot_phase(rsp_signals, troughs, peaks) + if static: + fig, ax = plt.subplots(nrows=nrow, ncols=1, sharex=True, figsize=figsize) - ax[0].fill_between( - x_axis[exhale], - exhale_signal[exhale], - rsp_signals["RSP_Clean"][exhale], - where=rsp_signals["RSP_Clean"][exhale] > exhale_signal[exhale], - color="#CFD8DC", - linestyle="None", - label="exhalation", - ) - ax[0].fill_between( - x_axis[inhale], - inhale_signal[inhale], - rsp_signals["RSP_Clean"][inhale], - where=rsp_signals["RSP_Clean"][inhale] > inhale_signal[inhale], - color="#ECEFF1", - linestyle="None", - label="inhalation", - ) - - ax[0].legend(loc="upper right") - - # Plot rate and optionally amplitude. - ax[1].set_title("Breathing Rate") - ax[1].plot(x_axis, rsp_signals["RSP_Rate"], color="#4CAF50", label="Rate", linewidth=1.5) - rate_mean = np.mean(rsp_signals["RSP_Rate"]) - ax[1].axhline(y=rate_mean, label="Mean", linestyle="--", color="#4CAF50") - ax[1].legend(loc="upper right") + last_ax = fig.get_axes()[-1] + last_ax.set_xlabel(x_label) - if "RSP_Amplitude" in list(rsp_signals.columns): - ax[2].set_title("Breathing Amplitude") + # Plot cleaned and raw respiration as well as peaks and troughs. + ax[0].set_title("Raw and Cleaned Signal") + fig.suptitle("Respiration (RSP)", fontweight="bold") - ax[2].plot( - x_axis, rsp_signals["RSP_Amplitude"], color="#009688", label="Amplitude", linewidth=1.5 + ax[0].plot( + x_axis, rsp_signals["RSP_Raw"], color="#B0BEC5", label="Raw", zorder=1 + ) + ax[0].plot( + x_axis, + rsp_signals["RSP_Clean"], + color="#2196F3", + label="Cleaned", + zorder=2, + linewidth=1.5, ) - amplitude_mean = np.mean(rsp_signals["RSP_Amplitude"]) - ax[2].axhline(y=amplitude_mean, label="Mean", linestyle="--", color="#009688") - ax[2].legend(loc="upper right") - if "RSP_RVT" in list(rsp_signals.columns): - ax[3].set_title("Respiratory Volume per Time") + ax[0].scatter( + x_axis[peaks], + rsp_signals["RSP_Clean"][peaks], + color="red", + label="Exhalation Onsets", + zorder=3, + ) + ax[0].scatter( + x_axis[troughs], + rsp_signals["RSP_Clean"][troughs], + color="orange", + label="Inhalation Onsets", + zorder=4, + ) - ax[3].plot(x_axis, rsp_signals["RSP_RVT"], color="#00BCD4", label="RVT", linewidth=1.5) - rvt_mean = np.mean(rsp_signals["RSP_RVT"]) - ax[3].axhline(y=rvt_mean, label="Mean", linestyle="--", color="#009688") - ax[3].legend(loc="upper right") + # Shade region to mark inspiration and expiration. + ax[0].fill_between( + x_axis[exhale], + exhale_signal[exhale], + rsp_signals["RSP_Clean"][exhale], + where=rsp_signals["RSP_Clean"][exhale] > exhale_signal[exhale], + color="#CFD8DC", + linestyle="None", + label="exhalation", + ) + ax[0].fill_between( + x_axis[inhale], + inhale_signal[inhale], + rsp_signals["RSP_Clean"][inhale], + where=rsp_signals["RSP_Clean"][inhale] > inhale_signal[inhale], + color="#ECEFF1", + linestyle="None", + label="inhalation", + ) - if "RSP_Symmetry_PeakTrough" in list(rsp_signals.columns): - ax[4].set_title("Cycle Symmetry") + ax[0].legend(loc="upper right") - ax[4].plot( + # Plot rate and optionally amplitude. + ax[1].set_title("Breathing Rate") + ax[1].plot( x_axis, - rsp_signals["RSP_Symmetry_PeakTrough"], - color="green", - label="Peak-Trough Symmetry", + rsp_signals["RSP_Rate"], + color="#4CAF50", + label="Rate", linewidth=1.5, ) - ax[4].plot( - x_axis, - rsp_signals["RSP_Symmetry_RiseDecay"], - color="purple", - label="Rise-Decay Symmetry", - linewidth=1.5, + ax[1].axhline(y=rate_mean, label="Mean", linestyle="--", color="#4CAF50") + ax[1].legend(loc="upper right") + + if "RSP_Amplitude" in list(rsp_signals.columns): + ax[2].set_title("Breathing Amplitude") + + ax[2].plot( + x_axis, + rsp_signals["RSP_Amplitude"], + color="#009688", + label="Amplitude", + linewidth=1.5, + ) + ax[2].axhline( + y=amplitude_mean, label="Mean", linestyle="--", color="#009688" + ) + ax[2].legend(loc="upper right") + + if "RSP_RVT" in list(rsp_signals.columns): + ax[3].set_title("Respiratory Volume per Time") + + ax[3].plot( + x_axis, + rsp_signals["RSP_RVT"], + color="#00BCD4", + label="RVT", + linewidth=1.5, + ) + ax[3].axhline(y=rvt_mean, label="Mean", linestyle="--", color="#009688") + ax[3].legend(loc="upper right") + + if "RSP_Symmetry_PeakTrough" in list(rsp_signals.columns): + ax[4].set_title("Cycle Symmetry") + + ax[4].plot( + x_axis, + rsp_signals["RSP_Symmetry_PeakTrough"], + color="green", + label="Peak-Trough Symmetry", + linewidth=1.5, + ) + ax[4].plot( + x_axis, + rsp_signals["RSP_Symmetry_RiseDecay"], + color="purple", + label="Rise-Decay Symmetry", + linewidth=1.5, + ) + ax[4].legend(loc="upper right") + return fig + else: + # Generate interactive plot with plotly. + try: + import plotly.graph_objects as go + from plotly.subplots import make_subplots + + except ImportError as e: + raise ImportError( + "NeuroKit error: rsp_plot(): the 'plotly'", + " module is required when 'static' is False.", + " Please install it first (`pip install plotly`).", + ) from e + + subplot_titles = ["Raw and Cleaned Signal", "Breathing Rate"] + if "RSP_Amplitude" in list(rsp_signals.columns): + subplot_titles.append("Breathing Amplitude") + if "RSP_RVT" in list(rsp_signals.columns): + subplot_titles.append("Respiratory Volume per Time") + if "RSP_Symmetry_PeakTrough" in list(rsp_signals.columns): + subplot_titles.append("Cycle Symmetry") + subplot_titles = tuple(subplot_titles) + fig = make_subplots( + rows=nrow, + cols=1, + shared_xaxes=True, + subplot_titles=subplot_titles, + ) + + # Plot cleaned and raw RSP + fig.add_trace( + go.Scatter( + x=x_axis, y=rsp_signals["RSP_Raw"], name="Raw", marker_color="#B0BEC5" + ), + row=1, + col=1, + ) + fig.add_trace( + go.Scatter( + x=x_axis, + y=rsp_signals["RSP_Clean"], + name="Cleaned", + marker_color="#2196F3", + ), + row=1, + col=1, ) - ax[4].legend(loc="upper right") + + # Plot peaks and troughs. + fig.add_trace( + go.Scatter( + x=x_axis[peaks], + y=rsp_signals["RSP_Clean"][peaks], + name="Exhalation Onsets", + marker_color="red", + mode="markers", + ), + row=1, + col=1, + ) + fig.add_trace( + go.Scatter( + x=x_axis[troughs], + y=rsp_signals["RSP_Clean"][troughs], + name="Inhalation Onsets", + marker_color="orange", + mode="markers", + ), + row=1, + col=1, + ) + + # TODO: Shade region to mark inspiration and expiration. + + # Plot rate and optionally amplitude. + fig.add_trace( + go.Scatter( + x=x_axis, y=rsp_signals["RSP_Rate"], name="Rate", marker_color="#4CAF50" + ), + row=2, + col=1, + ) + fig.add_trace( + go.Scatter( + x=x_axis, + y=[rate_mean] * len(x_axis), + name="Mean Rate", + marker_color="#4CAF50", + line=dict(dash="dash"), + ), + row=2, + col=1, + ) + + if "RSP_Amplitude" in list(rsp_signals.columns): + fig.add_trace( + go.Scatter( + x=x_axis, + y=rsp_signals["RSP_Amplitude"], + name="Amplitude", + marker_color="#009688", + ), + row=3, + col=1, + ) + fig.add_trace( + go.Scatter( + x=x_axis, + y=[amplitude_mean] * len(x_axis), + name="Mean Amplitude", + marker_color="#009688", + line=dict(dash="dash"), + ), + row=3, + col=1, + ) + + if "RSP_RVT" in list(rsp_signals.columns): + fig.add_trace( + go.Scatter( + x=x_axis, + y=rsp_signals["RSP_RVT"], + name="RVT", + marker_color="#00BCD4", + ), + row=4, + col=1, + ) + fig.add_trace( + go.Scatter( + x=x_axis, + y=[rvt_mean] * len(x_axis), + name="Mean RVT", + marker_color="#00BCD4", + line=dict(dash="dash"), + ), + row=4, + col=1, + ) + + if "RSP_Symmetry_PeakTrough" in list(rsp_signals.columns): + fig.add_trace( + go.Scatter( + x=x_axis, + y=rsp_signals["RSP_Symmetry_PeakTrough"], + name="Peak-Trough Symmetry", + marker_color="green", + ), + row=5, + col=1, + ) + fig.add_trace( + go.Scatter( + x=x_axis, + y=rsp_signals["RSP_Symmetry_RiseDecay"], + name="Rise-Decay Symmetry", + marker_color="purple", + ), + row=5, + col=1, + ) + + fig.update_layout(title_text="Respiration (RSP)", height=1250, width=750) + for i in range(1, nrow + 1): + fig.update_xaxes(title_text=x_label, row=i, col=1) + + return fig # ============================================================================= # Internals # ============================================================================= def _rsp_plot_phase(rsp_signals, troughs, peaks): - exhale_signal = pd.Series(np.full(len(rsp_signals), np.nan)) exhale_signal[troughs] = rsp_signals["RSP_Clean"][troughs].values exhale_signal[peaks] = rsp_signals["RSP_Clean"][peaks].values diff --git a/neurokit2/rsp/rsp_process.py b/neurokit2/rsp/rsp_process.py index 0056a1f52a..3de69c8854 100644 --- a/neurokit2/rsp/rsp_process.py +++ b/neurokit2/rsp/rsp_process.py @@ -2,12 +2,14 @@ import pandas as pd from ..misc import as_vector +from ..misc.report import create_report from ..signal import signal_rate from .rsp_amplitude import rsp_amplitude from .rsp_clean import rsp_clean from .rsp_methods import rsp_methods from .rsp_peaks import rsp_peaks from .rsp_phase import rsp_phase +from .rsp_plot import rsp_plot from .rsp_rvt import rsp_rvt from .rsp_symmetry import rsp_symmetry @@ -17,6 +19,7 @@ def rsp_process( sampling_rate=1000, method="khodadad2018", method_rvt="harrison2021", + report=None, **kwargs ): """**Process a respiration (RSP) signal** @@ -40,6 +43,11 @@ def rsp_process( method_rvt : str The rvt method to apply. Can be one of ``"harrison2021"`` (default), ``"birn2006"`` or ``"power2020"``. + report : str + The filename of a report containing description and figures of processing + (e.g. ``"myreport.html"``). Needs to be supplied if a report file + should be generated. Defaults to ``None``. Can also be ``"text"`` to + just print the text in the console without saving anything. **kwargs Other arguments to be passed to specific methods. For more information, see :func:`.rsp_methods`. @@ -77,7 +85,7 @@ def rsp_process( import neurokit2 as nk rsp = nk.rsp_simulate(duration=90, respiratory_rate=15) - signals, info = nk.rsp_process(rsp, sampling_rate=1000) + signals, info = nk.rsp_process(rsp, sampling_rate=1000, report="text") @savefig p_rsp_process_1.png scale=100% fig = nk.rsp_plot(signals, sampling_rate=1000) @@ -92,19 +100,12 @@ def rsp_process( ) # Clean signal - if ( - methods["method_cleaning"] is None - or methods["method_cleaning"].lower() == "none" - ): - rsp_cleaned = rsp_signal - else: - # Clean signal - rsp_cleaned = rsp_clean( - rsp_signal, - sampling_rate=sampling_rate, - method=methods["method_cleaning"], - **methods["kwargs_cleaning"], - ) + rsp_cleaned = rsp_clean( + rsp_signal, + sampling_rate=sampling_rate, + method=methods["method_cleaning"], + **methods["kwargs_cleaning"], + ) # Extract, fix and format peaks peak_signal, info = rsp_peaks( @@ -142,4 +143,12 @@ def rsp_process( ) signals = pd.concat([signals, phase, symmetry, peak_signal], axis=1) + if report is not None: + # Generate report containing description and figures of processing + if ".html" in str(report): + fig = rsp_plot(signals, sampling_rate=sampling_rate) + else: + fig = None + create_report(file=report, signals=signals, info=methods, fig=fig) + return signals, info diff --git a/neurokit2/rsp/rsp_simulate.py b/neurokit2/rsp/rsp_simulate.py index 856ebeb3cd..caaacd45f0 100644 --- a/neurokit2/rsp/rsp_simulate.py +++ b/neurokit2/rsp/rsp_simulate.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np +from ..misc import check_random_state, check_random_state_children from ..signal import signal_distort, signal_simulate, signal_smooth @@ -12,6 +13,7 @@ def rsp_simulate( respiratory_rate=15, method="breathmetrics", random_state=None, + random_state_distort="spawn", ): """**Simulate a respiratory signal** @@ -35,8 +37,14 @@ def rsp_simulate( trigonometric sine wave that roughly approximates a single respiratory cycle. If ``"breathmetrics"`` (default), will use an advanced model desbribed by `Noto, et al. (2018) `_. - random_state : int - Seed for the random number generator. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. + random_state_distort : {'legacy', 'spawn'}, None, int, numpy.random.RandomState or numpy.random.Generator + Random state to be used to distort the signal. If ``"legacy"``, use the same random state used to + generate the signal (discouraged as it creates dependent random streams). If ``"spawn"``, spawn + independent children random number generators from the random_state argument. If any of the other types, + generate independent children random number generators from the random_state_distort provided (this + allows generating multiple version of the same signal distorted by different random noise realizations). See Also -------- @@ -69,7 +77,8 @@ def rsp_simulate( """ # Seed the random generator for reproducible results - np.random.seed(random_state) + rng = check_random_state(random_state) + random_state_distort = check_random_state_children(random_state, random_state_distort, n_children=1) # Generate number of samples automatically if length is unspecified if length is None: @@ -81,7 +90,10 @@ def rsp_simulate( ) else: rsp = _rsp_simulate_breathmetrics( - duration=duration, sampling_rate=sampling_rate, respiratory_rate=respiratory_rate + duration=duration, + sampling_rate=sampling_rate, + respiratory_rate=respiratory_rate, + rng=rng, ) rsp = rsp[0:length] @@ -93,12 +105,10 @@ def rsp_simulate( noise_amplitude=noise, noise_frequency=[5, 10, 100], noise_shape="laplace", - random_state=random_state, + random_state=random_state_distort[0], silent=True, ) - # Reset random seed (so it doesn't affect global) - np.random.seed(None) return rsp @@ -138,6 +148,7 @@ def _rsp_simulate_breathmetrics_original( pause_amplitude=0.1, pause_amplitude_variance=0.2, signal_noise=0.1, + rng=None, ): """Simulates a recording of human airflow data by appending individually constructed sin waves and pauses in sequence. This is translated from the matlab code available `here. @@ -190,25 +201,23 @@ def _rsp_simulate_breathmetrics_original( # Normalize variance by average breath amplitude amplitude_variance_normed = average_amplitude * amplitude_variance - amplitudes_with_noise = np.random.randn(nCycles) * amplitude_variance_normed + average_amplitude + amplitudes_with_noise = rng.standard_normal(nCycles) * amplitude_variance_normed + average_amplitude amplitudes_with_noise[amplitudes_with_noise < 0] = 0 # Normalize phase by average breath length phase_variance_normed = phase_variance * sample_phase - phases_with_noise = np.round( - np.random.randn(nCycles) * phase_variance_normed + sample_phase - ).astype(int) + phases_with_noise = np.round(rng.standard_normal(nCycles) * phase_variance_normed + sample_phase).astype(int) phases_with_noise[phases_with_noise < 0] = 0 # Normalize pause lengths by phase and variation inhale_pauseLength_variance_normed = inhale_pause_phase * inhale_pauseLength_variance inhale_pauseLengths_with_noise = np.round( - np.random.randn(nCycles) * inhale_pauseLength_variance_normed + inhale_pause_phase + rng.standard_normal(nCycles) * inhale_pauseLength_variance_normed + inhale_pause_phase ).astype(int) inhale_pauseLengths_with_noise[inhale_pauseLengths_with_noise < 0] = 0 exhale_pauseLength_variance_normed = exhale_pause_phase * exhale_pauseLength_variance exhale_pauseLengths_with_noise = np.round( - np.random.randn(nCycles) * exhale_pauseLength_variance_normed + inhale_pause_phase + rng.standard_normal(nCycles) * exhale_pauseLength_variance_normed + inhale_pause_phase ).astype(int) # why inhale pause phase? @@ -238,22 +247,18 @@ def _rsp_simulate_breathmetrics_original( i = 1 for c in range(nCycles): # Determine length of inhale pause for this cycle - if np.random.rand() < inhale_pause_percent: + if rng.uniform() < inhale_pause_percent: this_inhale_pauseLength = inhale_pauseLengths_with_noise[c] - this_inhale_pause = ( - np.random.randn(this_inhale_pauseLength) * pause_amplitude_variance_normed - ) + this_inhale_pause = rng.standard_normal(this_inhale_pauseLength) * pause_amplitude_variance_normed this_inhale_pause[this_inhale_pause < 0] = 0 else: this_inhale_pauseLength = 0 this_inhale_pause = [] # Determine length of exhale pause for this cycle - if np.random.rand() < exhale_pause_percent: + if rng.uniform() < exhale_pause_percent: this_exhale_pauseLength = exhale_pauseLengths_with_noise[c] - this_exhale_pause = ( - np.random.randn(this_exhale_pauseLength) * pause_amplitude_variance_normed - ) + this_exhale_pause = rng.standard_normal(this_exhale_pauseLength) * pause_amplitude_variance_normed this_exhale_pause[this_exhale_pause < 0] = 0 else: this_exhale_pauseLength = 0 @@ -325,7 +330,7 @@ def _rsp_simulate_breathmetrics_original( if signal_noise == 0: signal_noise = 0.0001 - noise_vector = np.random.rand(*simulated_respiration.shape) * average_amplitude + noise_vector = rng.uniform(size=simulated_respiration.shape) * average_amplitude simulated_respiration = simulated_respiration * (1 - signal_noise) + noise_vector * signal_noise raw_features = { "Inhale Onsets": inhale_onsets, @@ -361,7 +366,7 @@ def _rsp_simulate_breathmetrics_original( return simulated_respiration, raw_features, feature_stats -def _rsp_simulate_breathmetrics(duration=10, sampling_rate=1000, respiratory_rate=15): +def _rsp_simulate_breathmetrics(duration=10, sampling_rate=1000, respiratory_rate=15, rng=None): n_cycles = int(respiratory_rate / 60 * duration) @@ -374,5 +379,6 @@ def _rsp_simulate_breathmetrics(duration=10, sampling_rate=1000, respiratory_rat sampling_rate=sampling_rate, breathing_rate=respiratory_rate / 60, signal_noise=0, + rng=rng, ) return rsp diff --git a/neurokit2/signal/signal_distort.py b/neurokit2/signal/signal_distort.py index d8ca59deb2..530cb7bc1d 100644 --- a/neurokit2/signal/signal_distort.py +++ b/neurokit2/signal/signal_distort.py @@ -3,7 +3,7 @@ import numpy as np -from ..misc import NeuroKitWarning, listify +from ..misc import NeuroKitWarning, check_random_state, listify from .signal_resample import signal_resample from .signal_simulate import signal_simulate @@ -56,9 +56,8 @@ def signal_distort( between 1 and 10% of the signal duration. linear_drift : bool Whether or not to add linear drift to the signal. - random_state : int - Seed for the random number generator. Keep it fixed for reproducible - results. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. silent : bool Whether or not to display warning messages. @@ -103,7 +102,7 @@ def signal_distort( """ # Seed the random generator for reproducible results. - np.random.seed(random_state) + rng = check_random_state(random_state) # Make sure that noise_amplitude is a list. if isinstance(noise_amplitude, (int, float)): @@ -125,6 +124,7 @@ def signal_distort( noise_frequency=noise_frequency, noise_shape=noise_shape, silent=silent, + rng=rng, ) # Powerline noise. @@ -148,6 +148,7 @@ def signal_distort( artifacts_amplitude=artifacts_amplitude, artifacts_number=artifacts_number, silent=silent, + rng=rng, ) if linear_drift: @@ -155,9 +156,6 @@ def signal_distort( distorted = signal + noise - # Reset random seed (so it doesn't affect global) - np.random.seed(None) - return distorted @@ -183,6 +181,7 @@ def _signal_distort_artifacts( artifacts_number=5, artifacts_shape="laplace", silent=False, + rng=None, ): # Generate artifact burst with random onset and random duration. @@ -193,15 +192,16 @@ def _signal_distort_artifacts( noise_amplitude=artifacts_amplitude, noise_shape=artifacts_shape, silent=silent, + rng=rng, ) if artifacts.sum() == 0: return artifacts min_duration = int(np.rint(len(artifacts) * 0.001)) max_duration = int(np.rint(len(artifacts) * 0.01)) - artifact_durations = np.random.randint(min_duration, max_duration, artifacts_number) + artifact_durations = rng.choice(range(min_duration, max_duration), size=artifacts_number) - artifact_onsets = np.random.randint(0, len(artifacts) - max_duration, artifacts_number) + artifact_onsets = rng.choice(len(artifacts) - max_duration, size=artifacts_number) artifact_offsets = artifact_onsets + artifact_durations artifact_idcs = np.array([False] * len(artifacts)) @@ -251,6 +251,7 @@ def _signal_distort_noise_multifrequency( noise_frequency=100, noise_shape="laplace", silent=False, + rng=None, ): base_noise = np.zeros(len(signal)) params = listify( @@ -274,6 +275,7 @@ def _signal_distort_noise_multifrequency( noise_amplitude=amp, noise_shape=shape, silent=silent, + rng=rng, ) base_noise += _base_noise @@ -287,6 +289,7 @@ def _signal_distort_noise( noise_amplitude=0.1, noise_shape="laplace", silent=False, + rng=None, ): _noise = np.zeros(n_samples) @@ -323,9 +326,9 @@ def _signal_distort_noise( noise_duration = int(duration * noise_frequency) if noise_shape in ["normal", "gaussian"]: - _noise = np.random.normal(0, noise_amplitude, noise_duration) + _noise = rng.normal(0, noise_amplitude, noise_duration) elif noise_shape == "laplace": - _noise = np.random.laplace(0, noise_amplitude, noise_duration) + _noise = rng.laplace(0, noise_amplitude, noise_duration) else: raise ValueError( "NeuroKit error: signal_distort(): 'noise_shape' should be one of 'gaussian' or 'laplace'." diff --git a/neurokit2/signal/signal_interpolate.py b/neurokit2/signal/signal_interpolate.py index 76f586a8ef..291b259ff5 100644 --- a/neurokit2/signal/signal_interpolate.py +++ b/neurokit2/signal/signal_interpolate.py @@ -97,10 +97,16 @@ def signal_interpolate(x_values, y_values=None, x_new=None, method="quadratic", if isinstance(x_new, int): if len(x_values) == x_new: return y_values + x_new = np.linspace(x_values[0], x_values[-1], x_new) else: # if x_values is identical to x_new, no need for interpolation if np.all(x_values == x_new): return y_values + + # If only one value, return a constant signal + if len(x_values) == 1: + return np.ones(len(x_new)) * y_values[0] + if method == "monotone_cubic": interpolation_function = scipy.interpolate.PchipInterpolator( x_values, y_values, extrapolate=True @@ -115,8 +121,7 @@ def signal_interpolate(x_values, y_values=None, x_new=None, method="quadratic", bounds_error=False, fill_value=fill_value, ) - if isinstance(x_new, int): - x_new = np.linspace(x_values[0], x_values[-1], x_new) + interpolated = interpolation_function(x_new) if method == "monotone_cubic" and fill_value != "extrapolate": diff --git a/neurokit2/signal/signal_noise.py b/neurokit2/signal/signal_noise.py index db83afecda..82f071b66e 100644 --- a/neurokit2/signal/signal_noise.py +++ b/neurokit2/signal/signal_noise.py @@ -1,7 +1,9 @@ import numpy as np +from ..misc import check_random_state -def signal_noise(duration=10, sampling_rate=1000, beta=1): + +def signal_noise(duration=10, sampling_rate=1000, beta=1, random_state=None): """**Simulate noise** This function generates pure Gaussian ``(1/f)**beta`` noise. The power-spectrum of the generated @@ -22,7 +24,8 @@ def signal_noise(duration=10, sampling_rate=1000, beta=1): The desired sampling rate (in Hz, i.e., samples/second). beta : float The noise exponent. - + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. Returns ------- @@ -77,6 +80,8 @@ def signal_noise(duration=10, sampling_rate=1000, beta=1): plt.close() """ + # Seed the random generator for reproducible results + rng = check_random_state(random_state) # The number of samples in the time series n = int(duration * sampling_rate) @@ -97,8 +102,8 @@ def signal_noise(duration=10, sampling_rate=1000, beta=1): # Generate scaled random power + phase, adjusting size to # generate one Fourier component per frequency - sr = np.random.normal(scale=f, size=len(f)) - si = np.random.normal(scale=f, size=len(f)) + sr = rng.normal(scale=f, size=len(f)) + si = rng.normal(scale=f, size=len(f)) # If the signal length is even, frequencies +/- 0.5 are equal # so the coefficient must be real. diff --git a/neurokit2/signal/signal_psd.py b/neurokit2/signal/signal_psd.py index 3b905f8352..85c134c18e 100644 --- a/neurokit2/signal/signal_psd.py +++ b/neurokit2/signal/signal_psd.py @@ -137,7 +137,11 @@ def signal_psd( # Sanitize min_frequency N = len(signal) if isinstance(min_frequency, str): - min_frequency = (2 * sampling_rate) / (N / 2) # for high frequency resolution + if sampling_rate is None: + # This is to compute min_frequency if both min_frequency and sampling_rate are not provided (#800) + min_frequency = (2 * np.median(np.diff(t))) / (N / 2) # for high frequency resolution + else: + min_frequency = (2 * sampling_rate) / (N / 2) # for high frequency resolution # MNE if method in ["multitaper", "multitapers", "mne"]: diff --git a/neurokit2/signal/signal_simulate.py b/neurokit2/signal/signal_simulate.py index 22e1865464..57513b102f 100644 --- a/neurokit2/signal/signal_simulate.py +++ b/neurokit2/signal/signal_simulate.py @@ -3,11 +3,17 @@ import numpy as np -from ..misc import NeuroKitWarning, listify +from ..misc import NeuroKitWarning, check_random_state, listify def signal_simulate( - duration=10, sampling_rate=1000, frequency=1, amplitude=0.5, noise=0, silent=False + duration=10, + sampling_rate=1000, + frequency=1, + amplitude=0.5, + noise=0, + silent=False, + random_state=None, ): """**Simulate a continuous signal** @@ -25,6 +31,8 @@ def signal_simulate( Noise level (amplitude of the laplace noise). silent : bool If ``False`` (default), might print warnings if impossible frequencies are queried. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. Returns ------- @@ -91,7 +99,8 @@ def signal_simulate( signal += _signal_simulate_sinusoidal(x=seconds, frequency=freq, amplitude=amp) # Add random noise if noise > 0: - signal += np.random.laplace(0, noise, len(signal)) + rng = check_random_state(random_state) + signal += rng.laplace(0, noise, len(signal)) return signal diff --git a/neurokit2/signal/signal_surrogate.py b/neurokit2/signal/signal_surrogate.py index 5930b7bd44..d94b225e70 100644 --- a/neurokit2/signal/signal_surrogate.py +++ b/neurokit2/signal/signal_surrogate.py @@ -1,7 +1,9 @@ import numpy as np +from ..misc import check_random_state -def signal_surrogate(signal, method="IAAFT", **kwargs): + +def signal_surrogate(signal, method="IAAFT", random_state=None, **kwargs): """**Create Signal Surrogates** Generate a surrogate version of a signal. Different methods are available, such as: @@ -20,6 +22,8 @@ def signal_surrogate(signal, method="IAAFT", **kwargs): The signal (i.e., a time series) in the form of a vector of values. method : str Can be ``"random"`` or ``"IAAFT"``. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. **kwargs Other keywords arguments, such as ``max_iter`` (by default 1000). @@ -85,16 +89,19 @@ def signal_surrogate(signal, method="IAAFT", **kwargs): # https://github.com/Frederic-vW/eeg_microstates/blob/eeg_microstates3.py#L861 # Or markov_simulate() + # Seed the random generator for reproducible results + rng = check_random_state(random_state) + method = method.lower() if method == "random": - surrogate = np.random.permutation(signal) + surrogate = rng.permutation(signal) elif method == "iaaft": - surrogate, _, _ = _signal_surrogate_iaaft(signal, **kwargs) + surrogate, _, _ = _signal_surrogate_iaaft(signal, rng=rng, **kwargs) return surrogate -def _signal_surrogate_iaaft(signal, max_iter=1000, atol=1e-8, rtol=1e-10, **kwargs): +def _signal_surrogate_iaaft(signal, max_iter=1000, atol=1e-8, rtol=1e-10, rng=None): """IAAFT max_iter : int Maximum iterations to be performed while checking for convergence. Convergence can be @@ -125,7 +132,7 @@ def _signal_surrogate_iaaft(signal, max_iter=1000, atol=1e-8, rtol=1e-10, **kwar previous_error, current_error = (-1, 1) # Start with a random permutation - t = np.fft.rfft(np.random.permutation(signal)) + t = np.fft.rfft(rng.permutation(signal)) for i in range(max_iter): # Match power spectrum diff --git a/neurokit2/stats/cluster.py b/neurokit2/stats/cluster.py index 4e115564aa..64563d9ba8 100644 --- a/neurokit2/stats/cluster.py +++ b/neurokit2/stats/cluster.py @@ -10,6 +10,7 @@ import sklearn.decomposition import sklearn.mixture +from ..misc import check_random_state from .cluster_quality import _cluster_quality_distance @@ -182,12 +183,12 @@ def cluster(data, method="kmeans", n_clusters=2, random_state=None, optimize=Fal # ============================================================================= # Kmeans # ============================================================================= -def _cluster_kmeans(data, n_clusters=2, random_state=None, **kwargs): +def _cluster_kmeans(data, n_clusters=2, random_state=None, n_init="auto", **kwargs): """K-means clustering algorithm""" # Initialize clustering function clustering_model = sklearn.cluster.KMeans( - n_clusters=n_clusters, random_state=random_state, n_init="auto", **kwargs + n_clusters=n_clusters, random_state=random_state, n_init=n_init, **kwargs ) # Fit @@ -203,7 +204,7 @@ def _cluster_kmeans(data, n_clusters=2, random_state=None, **kwargs): # Copy function with given parameters clustering_function = functools.partial( - _cluster_kmeans, n_clusters=n_clusters, random_state=random_state, **kwargs + _cluster_kmeans, n_clusters=n_clusters, random_state=random_state, n_init=n_init, **kwargs ) # Info dump @@ -234,9 +235,8 @@ def _cluster_kmedoids(data, n_clusters=2, max_iterations=1000, random_state=None n_samples = data.shape[0] # Step 1: Initialize random medoids - if not isinstance(random_state, np.random.RandomState): - random_state = np.random.RandomState(random_state) - ids_of_medoids = np.random.choice(n_samples, n_clusters, replace=False) + rng = check_random_state(random_state) + ids_of_medoids = rng.choice(n_samples, n_clusters, replace=False) # Find distance between objects to their medoids, can be euclidean or manhatten def find_distance(x, y, dist_method="euclidean"): @@ -254,12 +254,10 @@ def find_distance(x, y, dist_method="euclidean"): # Step 2: Update medoids for i in range(max_iterations): - # Find new random medoids + # Find new medoids ids_of_medoids = np.full(n_clusters, -1, dtype=int) - subset = np.random.choice(n_samples, n_samples, replace=False) - for i in range(n_clusters): - indices = np.intersect1d(np.where(segmentation == i)[0], subset) + indices = np.where(segmentation == i)[0] distances = find_distance(data[indices, None, :], data[None, indices, :]).sum(axis=0) ids_of_medoids[i] = indices[np.argmin(distances)] @@ -355,9 +353,8 @@ def _cluster_kmod( data_sum_sq = np.sum(data**2) # Select random timepoints for our initial topographic maps - if not isinstance(random_state, np.random.RandomState): - random_state = np.random.RandomState(random_state) - init_times = random_state.choice(n_samples, size=n_clusters, replace=False) + rng = check_random_state(random_state) + init_times = rng.choice(n_samples, size=n_clusters, replace=False) # Initialize random cluster centroids clusters = data[init_times, :] diff --git a/neurokit2/stats/cluster_quality.py b/neurokit2/stats/cluster_quality.py index 4b689374ca..740c31d194 100644 --- a/neurokit2/stats/cluster_quality.py +++ b/neurokit2/stats/cluster_quality.py @@ -7,8 +7,10 @@ import sklearn.mixture import sklearn.model_selection +from ..misc import check_random_state -def cluster_quality(data, clustering, clusters=None, info=None, n_random=10, **kwargs): + +def cluster_quality(data, clustering, clusters=None, info=None, n_random=10, random_state=None, **kwargs): """**Assess Clustering Quality** Compute quality of the clustering using several metrics. @@ -29,6 +31,8 @@ def cluster_quality(data, clustering, clusters=None, info=None, n_random=10, **k n_random : int The number of random initializations to cluster random data for calculating the GAP statistic. + random_state : None, int, numpy.random.RandomState or numpy.random.Generator + Seed for the random number generator. See for ``misc.check_random_state`` for further information. **kwargs Other argument to be passed on, for instance ``GFP`` as ``'sd'`` in microstates. @@ -65,6 +69,9 @@ def cluster_quality(data, clustering, clusters=None, info=None, n_random=10, **k definitions with and without logarithm function. arXiv preprint arXiv:1103.4767. """ + # Seed the random generator for reproducible results + rng = check_random_state(random_state) + # Sanity checks if isinstance(clustering, tuple): clustering, clusters, info = clustering @@ -92,7 +99,7 @@ def cluster_quality(data, clustering, clusters=None, info=None, n_random=10, **k general["Dispersion"] = _cluster_quality_dispersion(data, clustering, **kwargs) # Gap statistic - general.update(_cluster_quality_gap(data, clusters, clustering, info, n_random=n_random)) + general.update(_cluster_quality_gap(data, clusters, clustering, info, n_random=n_random, rng=rng)) # Mixture models if "sklearn_model" in info: @@ -183,7 +190,7 @@ def _cluster_quality_variance(data, clusters, clustering): return (sum_squares_total - sum_squares_within) / sum_squares_total -def _cluster_quality_gap(data, clusters, clustering, info, n_random=10): +def _cluster_quality_gap(data, clusters, clustering, info, n_random=10, rng=None): """GAP statistic and modified GAP statistic by Mohajer (2011). The GAP statistic compares the total within intra-cluster variation for different values of k @@ -197,7 +204,7 @@ def _cluster_quality_gap(data, clusters, clustering, info, n_random=10): for i in range(n_random): # Random data - random_data = np.random.random_sample(size=data.shape) + random_data = rng.uniform(size=data.shape) # Rescale random m = (maxs - mins) / (np.max(random_data, axis=0) - np.min(random_data, axis=0)) diff --git a/tests/tests_eda.py b/tests/tests_eda.py index f9a2b7827b..7cac122f50 100644 --- a/tests/tests_eda.py +++ b/tests/tests_eda.py @@ -14,7 +14,6 @@ def test_eda_simulate(): - eda1 = nk.eda_simulate(duration=10, length=None, scr_number=1, random_state=333) assert len(nk.signal_findpeaks(eda1, height_min=0.6)["Peaks"]) == 1 @@ -28,7 +27,6 @@ def test_eda_simulate(): def test_eda_clean(): - sampling_rate = 1000 eda = nk.eda_simulate( duration=30, @@ -63,11 +61,10 @@ def test_eda_clean(): def test_eda_phasic(): - - sampling_rate = 1000 + sr = 100 eda = nk.eda_simulate( duration=30, - sampling_rate=sampling_rate, + sampling_rate=sr, scr_number=6, noise=0.01, drift=0.01, @@ -75,23 +72,26 @@ def test_eda_phasic(): ) if platform.system() == "Linux": - cvxEDA = nk.eda_phasic(nk.standardize(eda), method="cvxeda") + cvxEDA = nk.eda_phasic(eda, sampling_rate=sr, method="cvxeda") assert len(cvxEDA) == len(eda) - smoothMedian = nk.eda_phasic(nk.standardize(eda), method="smoothmedian") + smoothMedian = nk.eda_phasic(eda, sampling_rate=sr, method="smoothmedian") assert len(smoothMedian) == len(eda) - highpass = nk.eda_phasic(nk.standardize(eda), method="highpass") + highpass = nk.eda_phasic(eda, sampling_rate=sr, method="highpass") assert len(highpass) == len(eda) + # This fails unfortunately... need to fix the sparsEDA algorithm + # sparsEDA = nk.eda_phasic(eda, sampling_rate=sr, method="sparsEDA") + # assert len(highpass) == len(eda) -def test_eda_peaks(): +def test_eda_peaks(): sampling_rate = 1000 eda = nk.eda_simulate( - duration=30*20, + duration=30 * 20, sampling_rate=sampling_rate, - scr_number=6*20, + scr_number=6 * 20, noise=0, drift=0.01, random_state=42, @@ -117,7 +117,6 @@ def test_eda_peaks(): def test_eda_process(): - eda = nk.eda_simulate(duration=30, scr_number=5, drift=0.1, noise=0, sampling_rate=250) signals, info = nk.eda_process(eda, sampling_rate=250) @@ -149,7 +148,6 @@ def test_eda_process(): def test_eda_plot(): - sampling_rate = 1000 eda = nk.eda_simulate( duration=30, @@ -171,7 +169,7 @@ def test_eda_plot(): "Skin Conductance Response (SCR)", "Skin Conductance Level (SCL)", ] - for (ax, title) in zip(fig.get_axes(), titles): + for ax, title in zip(fig.get_axes(), titles): assert ax.get_title() == title assert fig.get_axes()[2].get_xlabel() == "Samples" np.testing.assert_array_equal( @@ -187,7 +185,6 @@ def test_eda_plot(): def test_eda_eventrelated(): - eda = nk.eda_simulate(duration=15, scr_number=3) eda_signals, _ = nk.eda_process(eda, sampling_rate=1000) epochs = nk.epochs_create( @@ -206,21 +203,81 @@ def test_eda_eventrelated(): def test_eda_intervalrelated(): - data = nk.data("bio_resting_8min_100hz") - df, info = nk.eda_process(data["EDA"], sampling_rate=100) + df, _ = nk.eda_process(data["EDA"], sampling_rate=100) columns = ["SCR_Peaks_N", "SCR_Peaks_Amplitude_Mean"] # Test with signal dataframe - features_df = nk.eda_intervalrelated(df) + rez = nk.eda_intervalrelated(df) - assert all(elem in columns for elem in np.array(features_df.columns.values, dtype=str)) - assert features_df.shape[0] == 1 # Number of rows + assert all([i in rez.columns.values for i in columns]) + assert rez.shape[0] == 1 # Number of rows # Test with dict columns.append("Label") epochs = nk.epochs_create(df, events=[0, 25300], sampling_rate=100, epochs_end=20) - features_dict = nk.eda_intervalrelated(epochs) + rez = nk.eda_intervalrelated(epochs) + + assert all([i in rez.columns.values for i in columns]) + assert rez.shape[0] == 2 # Number of rows + + +def test_eda_sympathetic(): + eda_signal = nk.data("bio_eventrelated_100hz")["EDA"] + indexes_posada = nk.eda_sympathetic(eda_signal, sampling_rate=100, method="posada") + # Test value is float + assert isinstance(indexes_posada["EDA_Sympathetic"], float) + assert isinstance(indexes_posada["EDA_SympatheticN"], float) + + +def test_eda_findpeaks(): + eda_signal = nk.data("bio_eventrelated_100hz")["EDA"] + eda_cleaned = nk.eda_clean(eda_signal) + eda = nk.eda_phasic(eda_cleaned) + eda_phasic = eda["EDA_Phasic"].values + + # Find peaks + nabian2018 = nk.eda_findpeaks(eda_phasic, sampling_rate=100, method="nabian2018") + assert len(nabian2018["SCR_Peaks"]) == 9 + + vanhalem2020 = nk.eda_findpeaks(eda_phasic, sampling_rate=100, method="vanhalem2020") + min_n_peaks = min(len(vanhalem2020), len(nabian2018)) + assert any( + nabian2018["SCR_Peaks"][:min_n_peaks] - vanhalem2020["SCR_Peaks"][:min_n_peaks] + ) < np.mean(eda_signal) + + +@pytest.mark.parametrize( + "method_cleaning, method_phasic, method_peaks", + [ + ("none", "cvxeda", "gamboa2008"), + ("neurokit", "median", "nabian2018"), + ], +) +def test_eda_report(tmp_path, method_cleaning, method_phasic, method_peaks): + + sampling_rate = 100 + + eda = nk.eda_simulate( + duration=30, + sampling_rate=sampling_rate, + scr_number=6, + noise=0, + drift=0.01, + random_state=0, + ) + + d = tmp_path / "sub" + d.mkdir() + p = d / "myreport.html" + + signals, _ = nk.eda_process( + eda, + sampling_rate=sampling_rate, + method_cleaning=method_cleaning, + method_phasic=method_phasic, + method_peaks=method_peaks, + report=str(p), + ) - assert all(elem in columns for elem in np.array(features_dict.columns.values, dtype=str)) - assert features_dict.shape[0] == 2 # Number of rows + assert p.is_file() diff --git a/tests/tests_hrv.py b/tests/tests_hrv.py index c224a47a8b..5faeb62e39 100644 --- a/tests/tests_hrv.py +++ b/tests/tests_hrv.py @@ -3,7 +3,7 @@ import pytest import neurokit2 as nk -import neurokit2.misc as misc +from neurokit2 import misc def test_hrv_time(): @@ -125,6 +125,7 @@ def test_hrv_interpolated_rri(interpolation_rate): def test_hrv_missing(): random_state = 42 + rng = misc.check_random_state(random_state) # Download data data = nk.data("bio_resting_5min_100hz") sampling_rate = 100 @@ -137,8 +138,7 @@ def test_hrv_missing(): rri_time = peaks[1:] / sampling_rate # remove some intervals and their corresponding timestamps - np.random.seed(random_state) - missing = np.random.randint(0, len(rri), size=int(len(rri) / 5)) + missing = rng.choice(len(rri), size=int(len(rri) / 5)) rri_missing = rri[np.array([i for i in range(len(rri)) if i not in missing])] rri_time_missing = rri_time[np.array([i for i in range(len(rri_time)) if i not in missing])] diff --git a/tests/tests_ppg.py b/tests/tests_ppg.py index d701d797f3..5e5069139f 100644 --- a/tests/tests_ppg.py +++ b/tests/tests_ppg.py @@ -7,6 +7,7 @@ import neurokit2 as nk + durations = (20, 200) sampling_rates = (50, 500) heart_rates = (50, 120) @@ -80,6 +81,31 @@ def test_ppg_simulate_ibi(ibi_randomness, std_heart_rate): # TODO: test influence of different noise configurations +def test_ppg_simulate_legacy_rng(): + ppg = nk.ppg_simulate( + duration=30, + sampling_rate=250, + heart_rate=70, + frequency_modulation=0.2, + ibi_randomness=0.1, + drift=0.1, + motion_amplitude=0.1, + powerline_amplitude=0.01, + random_state=654, + random_state_distort="legacy", + show=False, + ) + + # Run simple checks to verify that the signal is the same as that generated with version 0.2.3 + # before the introduction of the new random number generation approach + assert np.allclose(np.mean(ppg), 0.6598246992405254) + assert np.allclose(np.std(ppg), 0.4542274696384863) + assert np.allclose( + np.mean(np.reshape(ppg, (-1, 1500)), axis=1), + [0.630608661400, 0.63061887029, 0.60807993168, 0.65731025466, 0.77250577818], + ) + + def test_ppg_clean(): sampling_rate = 500 @@ -138,7 +164,7 @@ def test_ppg_findpeaks(): peaks = info_elgendi["PPG_Peaks"] assert peaks.size == 29 - assert peaks.sum() == 219764 + assert np.abs(peaks.sum() - 219764) < 5 # off by no more than 5 samples in total # Test MSPTD method info_msptd = nk.ppg_findpeaks(ppg, sampling_rate=sampling_rate, method="bishop", show=True) @@ -146,7 +172,7 @@ def test_ppg_findpeaks(): peaks = info_msptd["PPG_Peaks"] assert peaks.size == 29 - assert peaks.sum() == 219665 + assert np.abs(peaks.sum() - 219665) < 30 # off by no more than 30 samples in total @pytest.mark.parametrize( diff --git a/tests/tests_rsp.py b/tests/tests_rsp.py index 1dbe0e1ffd..52e74515b7 100644 --- a/tests/tests_rsp.py +++ b/tests/tests_rsp.py @@ -9,6 +9,7 @@ import neurokit2 as nk + random.seed(a=13, version=2) @@ -31,6 +32,60 @@ def test_rsp_simulate(): ) +def test_rsp_simulate_legacy_rng(): + + rsp = nk.rsp_simulate( + duration=10, + sampling_rate=100, + noise=0.03, + respiratory_rate=12, + method="breathmetrics", + random_state=123, + random_state_distort="legacy", + ) + + # Run simple checks to verify that the signal is the same as that generated with version 0.2.3 + # before the introduction of the new random number generation approach + assert np.allclose(np.mean(rsp), 0.03869389548166346) + assert np.allclose(np.std(rsp), 0.3140022628657376) + assert np.allclose( + np.mean(np.reshape(rsp, (-1, 200)), axis=1), + [0.2948574728, -0.2835745073, 0.2717568165, -0.2474764970, 0.1579061923], + ) + + +@pytest.mark.parametrize( + "random_state, random_state_distort", + [ + (13579, "legacy"), + (13579, "spawn"), + (13579, 24680), + (13579, None), + (np.random.RandomState(33), "spawn"), + (np.random.SeedSequence(33), "spawn"), + (np.random.Generator(np.random.Philox(33)), "spawn"), + (None, "spawn"), + ], +) +def test_rsp_simulate_all_rng_types(random_state, random_state_distort): + + # Run rsp_simulate to test for errors (e.g. using methods like randint that are only + # implemented for RandomState but not Generator, or vice versa) + rsp = nk.rsp_simulate( + duration=10, + sampling_rate=100, + noise=0.03, + respiratory_rate=12, + method="breathmetrics", + random_state=random_state, + random_state_distort=random_state_distort, + ) + + # Double check the signal is finite and of the right length + assert np.all(np.isfinite(rsp)) + assert len(rsp) == 10 * 100 + + def test_rsp_clean(): sampling_rate = 100 @@ -43,7 +98,7 @@ def test_rsp_clean(): random_state=42, ) # Add linear drift (to test baseline removal). - rsp += nk.signal_distort(rsp, sampling_rate=sampling_rate, linear_drift=True) + rsp += nk.signal_distort(rsp, sampling_rate=sampling_rate, linear_drift=True, random_state=42) for method in ["khodadad2018", "biosppy", "hampel"]: cleaned = nk.rsp_clean(rsp, sampling_rate=sampling_rate, method=method) @@ -109,8 +164,8 @@ def test_rsp_peaks(): assert signals["RSP_Troughs"].sum() in [28, 29] assert info["RSP_Peaks"].shape[0] in [28, 29] assert info["RSP_Troughs"].shape[0] in [28, 29] - assert info["RSP_Peaks"].sum() in [1643836, 1646425, 1762134] - assert info["RSP_Troughs"].sum() in [1586580, 1596825, 1702508] + assert 4010 < np.median(np.diff(info["RSP_Peaks"])) < 4070 + assert 3800 < np.median(np.diff(info["RSP_Troughs"])) < 4010 assert info["RSP_Peaks"][0] > info["RSP_Troughs"][0] assert info["RSP_Peaks"][-1] > info["RSP_Troughs"][-1] @@ -287,3 +342,36 @@ def test_rsp_rvt(): assert len(rsp20) == len(rvt20) assert min(rvt10[~np.isnan(rvt10)]) >= 0 assert min(rvt20[~np.isnan(rvt20)]) >= 0 + + +@pytest.mark.parametrize( + "method_cleaning, method_peaks, method_rvt", + [("none", "scipy", "power2020"), + ("biosppy", "biosppy", "power2020"), + ("khodadad2018", "khodadad2018", "birn2006"), + ("power2020", "scipy", "harrison2021"), + ], +) +def test_rsp_report(tmp_path, method_cleaning, method_peaks, method_rvt): + + sampling_rate = 100 + + rsp = nk.rsp_simulate( + duration=30, + sampling_rate=sampling_rate, + random_state=0, + ) + + d = tmp_path / "sub" + d.mkdir() + p = d / "myreport.html" + + signals, _ = nk.rsp_process( + rsp, + sampling_rate=sampling_rate, + report=str(p), + method_cleaning=method_cleaning, + method_peaks=method_peaks, + method_rvt=method_rvt, + ) + assert p.is_file() diff --git a/tests/tests_signal.py b/tests/tests_signal.py index 39a7d5fc8c..1a7f2926e9 100644 --- a/tests/tests_signal.py +++ b/tests/tests_signal.py @@ -8,6 +8,7 @@ import neurokit2 as nk + # ============================================================================= # Signal # ============================================================================= @@ -393,3 +394,29 @@ def test_signal_distort(): nk.signal_distort(signal, noise_amplitude=1, noise_frequency=0.1, silent=False) signal2 = nk.signal_simulate(duration=10, frequency=0.5, sampling_rate=10) + + +def test_signal_surrogate(): + # Logistic map + r = 3.95 + x = np.empty(450) + x[0] = 0.5 + for i in range(1, len(x)): + x[i] = r * x[i - 1] * (1 - x[i - 1]) + x = x[50:] + # Create surrogate + surrogate = nk.signal_surrogate(x, method="IAAFT", random_state=127) + # Check mean and variance + assert np.allclose(np.mean(x), np.mean(surrogate)) + assert np.allclose(np.var(x), np.var(surrogate)) + # Check distribution + assert np.allclose( + np.histogram(x, 10, (0, 1))[0], + np.histogram(surrogate, 10, (0, 1))[0], + atol=1 + ) + # Check spectrum + assert ( + np.mean(np.abs(np.abs(np.fft.rfft(surrogate - np.mean(surrogate))) + - np.abs(np.fft.rfft(x - np.mean(x))))) < 0.1 + ) diff --git a/tests/tests_stats.py b/tests/tests_stats.py index 9e871dbe0c..d36e59222b 100644 --- a/tests/tests_stats.py +++ b/tests/tests_stats.py @@ -42,3 +42,63 @@ def test_mad(): negative_wikipedia_example = -wikipedia_example assert nk.mad(negative_wikipedia_example, constant=constant) == constant + + +def create_sample_cluster_data(random_state): + + rng = nk.misc.check_random_state(random_state) + + # generate simple sample data + K = 5 + points = np.array([[0., 0.], [-0.3, -0.3], [0.3, -0.3], [0.3, 0.3], [-0.3, 0.3]]) + centres = np.column_stack((rng.choice(K, size=K, replace=False), rng.choice(K, size=K, replace=False))) + angles = rng.uniform(0, 2 * np.pi, size=K) + offset = rng.uniform(size=2) + + # place a cluster at each centre + data = [] + for i in range(K): + rotation = np.array([[np.cos(angles[i]), np.sin(angles[i])], [-np.sin(angles[i]), np.cos(angles[i])]]) + data.extend(centres[i] + points @ rotation) + rng.shuffle(data) + + # shift both data and target centres + data = np.vstack(data) + offset + centres = centres + offset + + return data, centres + + +def test_kmedoids(): + + # set random state for reproducible results + random_state_data = 33 + random_state_clustering = 77 + + # create sample data + data, centres = create_sample_cluster_data(random_state_data) + K = len(centres) + + # run kmedoids + res = nk.cluster(data, method='kmedoids', n_clusters=K, random_state=random_state_clustering) + + # check results (sort, then compare rows of res[1] and points) + assert np.allclose(res[1][np.lexsort(res[1].T)], centres[np.lexsort(centres.T)]) + + + +def test_kmeans(): + + # set random state for reproducible results + random_state_data = 54 + random_state_clustering = 76 + + # create sample data + data, centres = create_sample_cluster_data(random_state_data) + K = len(centres) + + # run kmeans + res = nk.cluster(data, method='kmeans', n_clusters=K, n_init=1, random_state=random_state_clustering) + + # check results (sort, then compare rows of res[1] and points) + assert np.allclose(res[1][np.lexsort(res[1].T)], centres[np.lexsort(centres.T)])