Source code for maad.features.temporal

#!/usr/bin/env python
"""
Collection of functions to extract features from audio signals
License: New BSD License
"""

# =============================================================================
# Load the modules
# =============================================================================
# Import external modules
import numpy as np
import pandas as pd
from scipy import interpolate
from scipy.optimize import root

# Import internal modules
from maad.util import moments
from maad.sound import envelope, trim
from maad import sound
from numpy.lib.function_base import _quantile_is_valid

#%%
# =============================================================================
# public functions
# =============================================================================
#%%

[docs]
def temporal_moments(s, fs=None, roi=None):
    """
    Computes the first 4th moments of an audio signal, mean, variance, skewness, kurtosis.

    Parameters
    ----------
    s : 1D array
        Audio to process
    fs : float, optional
        Sampling frequency of audio signal
        The default is None
    roi : pandas.Series, optional
        Region of interest where peak frequency will be computed.
        Series must have a valid input format with index: min_t, min_f, max_t, max_f.
        The default is None.        

    Returns
    -------
    mean : float
        mean of the audio
    var : float
        variance  of the audio
    skew : float
        skewness of the audio
    kurt : float
        kurtosis of the audio

    Examples
    --------
    >>> from maad import sound, features
    >>> s, fs = sound.load('../data/spinetail.wav')
    >>> sm, sv, ss, sk = features.temporal_moments (s, fs)
    >>> print('mean: %2.2f / var: %2.5f / skewness: %2.4f / kurtosis: %2.2f' % (sm, sv, ss, sk))
    mean: -0.00 / var: 0.00117 / skewness: -0.0065 / kurtosis: 24.71

    """
    # force s to be ndarray
    s = np.asarray(s)

    if (roi is not None):
        if (fs is not None) :
            s = trim(s, fs, min_t=roi.min_t, max_t=roi.max_t)
        else : 
            raise ValueError("If 'roi' is not None, 'fs' must be defined")     
        
    return moments(s)


#%%

[docs]
def zero_crossing_rate(s, fs, roi=None):
    """
    Compute the zero crossing rate feature of an audio signal.

    The zero-crossing rate is the rate of sign-changes along a signal, i.e.,
    the rate at which the signal changes from positive to zero to negative or
    from negative to zero to positive. This feature has been used widely
    in speech recognition and music information retrieval,
    being a key feature to classify percussive sounds [1]_.

    Parameters
    ----------
    s : 1D array
        Audio to process (wav)
    fs : float
        Sampling frequency of audio signal
    roi : pandas.Series, optional
        Region of interest where peak frequency will be computed.
        Series must have a valid input format with index: min_t, min_f, max_t, max_f.
        The default is None.        

    Returns
    -------
    zcr : float
        number of zero crossing /s

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Zero-crossing_rate

    Examples
    --------
    >>> from maad import sound, features
    >>> s, fs = sound.load('../data/spinetail.wav')
    >>> features.zero_crossing_rate(s,fs)
    10500.397192384766

    """

    if (roi is not None):
        s = trim(s, fs, min_t=roi.min_t, max_t=roi.max_t)

    zero_crosses = np.nonzero(np.diff(s > 0))[0]
    duration = len(s) / fs
    zcr = 1/duration * len(zero_crosses)

    return zcr


#%%

[docs]
def temporal_quantile(s, fs, q=[0.05, 0.25, 0.5, 0.75, 0.95], nperseg=1024, roi=None, mode="spectrum",
                      env_mode="fast", as_pandas=False, amp=False, **kwargs):
    """
    Compute the q-th temporal quantile of the waveform or spectrum. If a
    region of interest with time and spectral limits is provided, the q-th
    temporal quantile is computed on the selection.

    Parameters
    ----------
    s : 1D array
        Input audio signal
    fs : float
        Sampling frequency of audio signal
    q : array or float, optional
        Quantile or sequence of quantiles to compute, which must be between 0 and 1
        inclusive.
        The defaul is [0.05, 0.25, 0.5, 0.75, 0.95].
    nperseg : int, optional
        Length of segment to compute the FFT when mode is spectrum. The default is 1024.
        Size of each frame to compute the envelope. The largest, the highest is
        the approximation. The default is 5000.
    roi : pandas.Series, optional
        Region of interest where peak frequency will be computed.
        Series must have a valid input format with index: min_t, min_f, max_t, max_f.
        The default is None.
    mode : str, optional, default is 'spectrum'
        - 'spectrum' : The quantile is calculated in the espectrum.
        - 'envelope' : The quantile is calculated in the sound wave.
    env_mode : str, optional, default is `fast`
        - `fast` : The sound is first divided into frames (2d) using the
            function wave2timeframes(s), then the max of each frame gives a
            good approximation of the envelope.
        - `Hilbert` : estimation of the envelope from the Hilbert transform.
            The method is slow
    as_pandas: bool
        Return data as a pandas.Series. This is usefull when computing multiple features
        over a signal. Default is False.
    amp: bool, default is False
        Return the quantiles with its amplitude.

    Returns
    -------
    quantiles: pandas Series/DataFrame or Numpy array
        Temporal quantiles of waveform and its amplitude (optional).

    Examples
    --------
    >>> from maad import features, sound
    >>> s, fs = sound.load('../data/spinetail.wav')

    Compute the q-th temporal quantile in the spectrum

    >>> qt = features.temporal_quantile(s, fs, [0.05, 0.25, 0.5, 0.75, 0.95], as_pandas=True)
    >>> print(qt)
    0.05     1.219048
    0.25     5.712109
    0.50    11.818957
    0.75    16.555828
    0.95    17.751655
    dtype: float64

    Compute the q-th temporal quantile in the waveform, using the envelope

    >>> qt = features.temporal_quantile(s, fs, [0.05, 0.25, 0.5, 0.75, 0.95], mode="envelope", as_pandas=True)
    >>> print(qt)
    0.05     1.208300
    0.25     5.716188
    0.50    11.804161
    0.75    15.731135
    0.95    17.752714
    dtype: float64

    """
    q = np.asanyarray(q)
    if not _quantile_is_valid(q):
        raise ValueError("Percentiles must be in the range [0, 1]")

    # compute quantiles in the time amp
    if mode=="envelope":
        if roi is None:
            min_t = 0
        elif fs is not None:
            s = sound.trim(s, fs, min_t=roi.min_t, max_t=roi.max_t)
            min_t = roi.min_t
        else :
            raise ValueError("If 'roi' is not None, 'fs' must be defined")

        env = sound.envelope(s**2, env_mode, nperseg)
        t = min_t+np.arange(0,len(env),1)*len(s)/fs/len(env)
        energy = pd.Series(env, index=t)

        # Compute temporal quantile
        norm_cumsum = energy.cumsum()/energy.sum()
        spec_quantile = []
        for quantile in q:
            spec_quantile.append(energy.index[np.where(norm_cumsum>=quantile)[0][0]])

        if amp:
            if as_pandas:
                out = pd.DataFrame({"time":spec_quantile, "amp":energy[spec_quantile].values}, index=q)
            else:
                out = np.transpose(np.array([q, spec_quantile, energy[spec_quantile]]))
        else:
            if as_pandas:
                out = pd.Series(spec_quantile, index=q)
            else:
                out = np.array(spec_quantile)

        return out

    elif mode=="spectrum":
        if roi is None:
            Sxx,tn,_,_ = sound.spectrogram(s, fs, nperseg=nperseg, **kwargs)
        else:
            Sxx,tn,_,_ = sound.spectrogram(s, fs, nperseg=nperseg,
                                        tlims=[roi.min_t, roi.max_t],
                                        flims=[roi.min_f, roi.max_f],
                                        **kwargs)
        Sxx = pd.Series(np.average(Sxx,axis=0), index=tn)

        # Compute spectral q
        norm_cumsum = Sxx.cumsum()/Sxx.sum()
        spec_quantile = []
        for quantile in q:
            spec_quantile.append(Sxx.index[np.where(norm_cumsum>=quantile)[0][0]])

        if amp:
            if as_pandas:
                out = pd.DataFrame({"time":spec_quantile, "amp":Sxx[spec_quantile].values}, index=q)
            else:
                out = np.transpose(np.array([q, spec_quantile, Sxx[spec_quantile]]))
        else:
            if as_pandas:
                out = pd.Series(spec_quantile, index=q)
            else:
                out = np.array(spec_quantile)

        return out
    else:
        raise Exception("Invalid mode. Mode should be 'spectrum' or 'envelope'")

    
#%%

[docs]
def temporal_duration(s, fs, nperseg=1024, roi=None, mode="spectrum",
                      env_mode="fast", as_pandas=False, **kwargs):
    """
    Compute the temporal duration of the waveform. If a region of interest with time
    and spectral limits is provided, the temporal duration is computed on the selection.

    Parameters
    ----------
    s : 1D array
        Input audio signal
    fs : float
        Sampling frequency of audio signal
    nperseg : int, optional
        Length of segment to compute the FFT. The default is 1024.
    roi : pandas.Series, optional
        Region of interest where temporal duration will be computed.
        Series must have a valid input format with index: min_t, min_f, max_t, max_f.
        The default is None.
    mode : str, optional, default is 'spectrum'
        - 'spectrum' : The quantile is calculated using the spectrum.
        - 'amplitude' : The quantile is calculated using the enveloppe sound wave.
    env_mode : str, optional, default is `fast`
        - `fast` : The sound is first divided into frames (2d) using the
            function wave2timeframes(s), then the max of each frame gives a
            good approximation of the envelope.
        - `Hilbert` : estimation of the envelope from the Hilbert transform.
            The method is slow
    as_pandas_series: bool
        Return data as a pandas.Series. This is usefull when computing multiple features
        over a signal. Default is False.

    Returns
    -------
    duration: pandas Series/DataFrame or Numpy array
        Temporal duration of signal using energy quantiles.

    Examples
    --------
    >>> from maad import features, sound
    >>> s, fs = sound.load('../data/spinetail.wav')

    Compute the temporal duration of the time energy

    >>> duration, duration_90 = features.temporal_duration(s, fs)
    >>> print("Duration 50%: {:.4f} / Duration 90%: {:.4f}".format(duration, duration_90))
    Duration 50%: 10.8437 / Duration 90%: 16.5326

    """
    # Compute temporal quantile
    q = temporal_quantile(s, fs, [0.05, 0.25, 0.75, 0.95], nperseg, roi, mode, 
                          env_mode, as_pandas, **kwargs)

    # Compute temporal duration
    if as_pandas:
        out = pd.Series([np.abs(q.iloc[2]-q.iloc[1]), np.abs(q.iloc[3]-q.iloc[0])],
                        index=["duration_50", "duration_90"])
    else:
        out = np.array([np.abs(q[2]-q[1]), np.abs(q[3]-q[0])])

    return out


#%%

[docs]
def all_temporal_features(s, fs, nperseg=1024, roi=None, display=False, **kwargs):
    """
    Compute all the temporal features for a signal.

    Parameters
    ----------
    s : 1D array
        Input audio signal
    fs : float
        Sampling frequency of audio signal
    nperseg : int, optional
        Length of segment to compute the FFT. The default is 1024.
    roi : pandas.Series, optional
        Region of interest where temporal features will be computed.
        Series must have a valid input format with index: min_t, min_f, max_t, max_f.
        The default is None.
    kwargs : additional keyword arguments
        If `window='hann'`, additional keyword arguments to pass to
        `sound.spectrum`.

    Returns
    -------
    temporal_features : pandas DataFrame
        DataFrame with all temporal features computed in the spectrum
    
    Examples
    --------
    >>> from maad import features, sound
    >>> s, fs = sound.load('../data/spinetail.wav')

    Compute all the temporal features

    >>> temporal_features = features.all_temporal_features(s,fs)
    >>> print(temporal_features.iloc[0])
    sm            -2.043264e-19
    sv             1.167074e-03
    ss            -6.547980e-03
    sk             2.471161e+01
    Time 5%        1.219048e+00
    Time 25%       5.712109e+00
    Time 50%       1.181896e+01
    Time 75%       1.655583e+01
    Time 95%       1.775166e+01
    zcr            1.050040e+04
    duration_50    1.001495e+01
    duration_90    1.654441e+01
    Name: 0, dtype: float64
    """

    tm  = temporal_moments(s, fs, roi)
    zcr = zero_crossing_rate(s, fs, roi)
    qt = temporal_quantile(s, fs, [0.05, 0.25, 0.5, 0.75, 0.95], nperseg, roi, mode="spectrum", **kwargs)
    duration_50, duration_90 = temporal_duration(s, fs, nperseg, roi, mode="envelope")

    temporal_features = pd.DataFrame({"sm":tm[0], "sv":tm[1], "ss":tm[2], "sk":tm[3],
                                    "Time 5%":qt[0], "Time 25%":qt[1], "Time 50%":qt[2], 
                                    "Time 75%":qt[3], "Time 95%":qt[4], "zcr":zcr,
                                    "duration_50":duration_50, "duration_90":duration_90}, index=[0])

    if display: print(temporal_features)

    return temporal_features


if __name__ == "__main__":
    import doctest
    doctest.testmod()