Source code for maad.util.math_func

#!/usr/bin/env python
""" 
Mathematical tools for audio signal processing.
"""
# =============================================================================
# Load the modules
# =============================================================================
# Import external modules
import matplotlib.pyplot as plt
import numpy as np 
from numpy import mean, median, var
from scipy.ndimage import uniform_filter1d # for fast running mean
from scipy.signal import periodogram, welch
import pandas as pd
# min value
import sys
_MIN_ = sys.float_info.min

# Import internal modules
from maad.util import linear_scale


#%%
# =============================================================================
# public functions
# =============================================================================


[docs]
def running_mean(x, N, mode="nearest"):
    """
    Compute fast running mean for a window size N.
    
    Parameters
    ----------
    x :  1d ndarray of scalars
        Vector 

    mode : {'reflect', 'constant', 'nearest', 'mirror', 'wrap'}, optional, 
        The `mode` parameter determines how the input array is extended
        when the filter overlaps a border. Default is 'nearest'. Behavior
        for each valid value is as follows:
    
        'reflect' (`d c b a | a b c d | d c b a`)
            The input is extended by reflecting about the edge of the last
            pixel.
    
        'constant' (`k k k k | a b c d | k k k k`)
            The input is extended by filling all values beyond the edge with
            the same constant value, defined by the `cval` parameter.
    
        'nearest' (`a a a a | a b c d | d d d d`)
            The input is extended by replicating the last pixel.
    
        'mirror' (`d c b | a b c d | c b a`)
            The input is extended by reflecting about the center of the last
            pixel.
    
        'wrap' (`a b c d | a b c d | a b c d`)
            The input is extended by wrapping around to the opposite edge.
            
    N : int
        length of window to compute the mean
        
    Returns
    -------
    x_mean : 1d ndarray of scalars
        Vector with the same dimensions than the original variable x
        
    Examples
    --------
    >>> maad.util.running_mean([2, 8, 0, 4, 1, 9, 9, 0], N=3)
        array([4, 3, 4, 1, 4, 6, 6, 3])
        
    """         
    x_mean = uniform_filter1d(x, size=N, mode="nearest")
    return x_mean


#%%

[docs]
def get_unimode (X, mode ='median', axis=1, N=7, N_bins=100, verbose=False):
    """
    Get the statistical mode or modal value which is the most common number in the 
    dataset.
    
    Parameters
    ----------
    X :  1d or 2d ndarray of scalars
        Vector or matrix 
                
    mode : str, optional, default is 'median'
        Select the mode to remove the noise
        Possible values for mode are :
        - 'ale' : Adaptative Level Equalization algorithm [Lamel & al. 1981]
        - 'median' : subtract the median value
        - 'mean' : subtract the mean value (DC)
    
    axis : integer, default is 1
        if matrix, estimate the mode for each row (axis=0) or each column (axis=1)
        
    N : int (only for mode = "ale")
        length of window to compute the running mean of the histogram
        
    N_bins : int (only for mode = "ale")
        number of bins to compute the histogram
            
    verbose : boolean, optional, default is False
        print messages into the consol or terminal if verbose is True
                      
    Returns
    -------
    unimode_value : float
        The most common number in the dataset
        
    Notes
    -----
    ale : Adaptative Level Equalization algorithm from Lamel et al., 1981 :
    L.F. Lamel, L.R. Rabiner, A.E. Rosenberg, J.G. Wilpon
    An improved endpoint detector for isolated word recognition
    IEEE Trans. ASSP, ASSP-29 (1981), pp. 777-785
    `DOI: 10.1109/TASSP.1981.1163642 <https://doi.org/10.1109/TASSP.1981.1163642>`_
    
    Examples
    --------
    
    This function is interesting to obtain the background noise (BGN) profile (e.g. frequency bin
    by frequency bin) of a spectrogram
    
    >>> w, fs = maad.sound.load('../data/cold_forest_daylight.wav') 
    >>> Sxx_power,tn,fn,_ = maad.sound.spectrogram(w,fs,window='hann',noverlap=512, nFFT=1024)
    >>> Sxx_dB = maad.util.power2dB(Sxx_power)
    >>> BGN_med = maad.util.get_unimode (Sxx_dB, mode='median', axis=1)
    >>> import matplotlib.pyplot as plt 
    >>> plt.plot(fn,maad.util.mean_dB(Sxx_dB,axis=1))
    >>> plt.plot(fn,BGN_med)
    
    Extract the background noise from mean
    
    >>> BGN_mean = maad.util.get_unimode (Sxx_dB, mode='mean', axis=1)
    >>> plt.plot(fn,BGN_mean)
    
    Extract the background noise from ale (i.e. unimode)
    
    >>> BGN_ale = maad.util.get_unimode (Sxx_dB, mode='ale', N=7, N_bins=100, axis=1)
    >>> plt.plot(fn,BGN_ale)
    
    """         
    if X.ndim ==2: 
        if axis == 0:
            X = X.transpose()
            axis = 1
    elif X.ndim ==1: 
        axis = 0
        
    if mode=='ale':
                
        if X.ndim ==2:
            unimode_value = []
            for i, x in enumerate(X):  
                # Min and Max of the envelope (without taking into account nan)
                x_min = np.nanmin(x)
                x_max = np.nanmax(x)
                # Compute a 50-bin histogram ranging between Min and Max values
                hist, bin_edges = np.histogram(x, bins=N_bins, range=(x_min, x_max))
                
                # smooth the histogram by running mean
                hist_smooth = running_mean(hist, N, mode="nearest")
                    
                # find the maximum of the peak with quadratic interpolation
                # don't take into account the first 4 bins.
                imax = np.argmax(hist_smooth) 
               
                unimode_value.append(bin_edges[imax])
                
            # transpose the vector
            unimode_value = np.asarray(unimode_value)
            unimode_value = unimode_value.transpose()
        else:
            x = X
            # Min and Max of the envelope (without taking into account nan)
            x_min = np.nanmin(x)
            x_max = np.nanmax(x)
            
            # Compute a 50-bin histogram ranging between Min and Max values
            hist, bin_edges = np.histogram(x, bins=N_bins, range=(x_min, x_max))
            
            # smooth the histogram by running mean
            hist_smooth = running_mean(hist, N, mode="nearest")
                                                 
            # find the maximum of the peak with quadratic interpolation
            imax = np.argmax(hist_smooth)
                        
            # assuming an additive noise model : noise_bckg is the max of the histogram
            # as it is an histogram, the value is unimode_value = bin_edges_interp[np.argmax(hist_interp)]
            unimode_value = bin_edges[imax]

    elif mode=='median':
        unimode_value = median(X, axis=axis)
        
    elif mode=='mean':
        unimode_value = mean(X, axis=axis)
        
    return unimode_value


#%%

[docs]
def rms(s):
    """
    Compute the root-mean-square (RMS) level of an input signal. 
    
    RMS is defined as the square root of the arithmetic mean of the square of a set of numbers [1]. The RMS is used to estimate de mean amplitude level of an audio signal or any alternative time series.
    
    
    Parameters
    ----------
    s : 1D array
        Input signal to process

    Returns
    -------
    rms: float
        Root mean square of input signal
    
    References
    ----------
    .. [1] 'Root mean square' (2010). Wikipedia. Available at https://en.wikipedia.org/wiki/Root_mean_square
    
    Examples
    --------
    >>> from maad import sound, util
    >>> s, fs = sound.load('../data/spinetail.wav')
    >>> rms_value = util.rms(s)
    
    """
    return np.sqrt(np.mean(s**2))


#%%

[docs]
def skewness (x, axis=None):
    """
    Compute the skewness (asymetry) of an audio signal.
    
    Parameters
    ----------
    x : ndarray of floats 
        1d signal or 2d matrix
    axis : integer, optional, default is None
        select the axis to compute the kurtosis
        The default is to compute the mean of the flattened array.
                            
    Returns
    -------    
    sk : float or ndarray of floats
        skewness of x 
        if x is a 1d vector => single value
        if x is a 2d matrix => array of values corresponding to the number of
        points in the other axis
    
    Examples
    --------
    >>> from maad import sound, util
    >>> s, fs = sound.load('../data/spinetail.wav')
    >>> util.skewness(s)
    -0.006547980427883208
    
    """
    if isinstance(x, (np.ndarray)) == True:
        if axis is None:
            # flatten the array
            Nf = len(np.ndarray.flatten((x)))
        else:
            Nf = x.shape[axis]
        mean_x =  np.mean(x, axis=axis)
        std_x = np.std(x, axis=axis)
        if axis == 0 :
            z = x - mean_x[np.newaxis, ...]
        else :
            z = x - mean_x[..., np.newaxis]
        sk = (np.sum(z**3, axis=axis)/(Nf-1))/std_x**3
    else:
        print ("WARNING: type of x must be ndarray") 
        sk = None

    # test if ku is an array with a single value
    if (isinstance(sk, (np.ndarray)) == True) and (len(sk) == 1):
        sk = float(sk)

    return sk


#%%

[docs]
def kurtosis (x, axis=None):
    """
    Compute the kurtosis (tailedness or curved or arching) of an audio signal.
    
    Parameters
    ----------
    x : ndarray of floats 
        1d signal or 2d matrix       
    axis : integer, optional, default is None
        select the axis to compute the kurtosis
        The default is to compute the mean of the flattened array.
                            
    Returns
    -------    
    ku : float or ndarray of floats
        kurtosis of x 
        if x is a 1d vector => single value
        if x is a 2d matrix => array of values corresponding to the number of
        points in the other axis
        
    Examples
    --------
    >>> from maad import sound, util
    >>> s, fs = sound.load('../data/spinetail.wav')
    >>> util.kurtosis(s)
    24.711610834321217        
        
    """
    if isinstance(x, (np.ndarray)) == True:
        if axis is None:
            # flatten the array
            Nf = len(np.ndarray.flatten((x)))
        else:
            Nf = x.shape[axis]
        mean_x =  np.mean(x, axis=axis)
        std_x = np.std(x, axis=axis)
        if axis==0 :
            z = x - mean_x[np.newaxis, ...]
        else:
            z = x - mean_x[..., np.newaxis]
        ku = (np.sum(z**4, axis=axis)/(Nf-1))/std_x**4
    else:
        print ("WARNING: type of x must be ndarray") 
        ku = None
        
    # test if ku is an array with a single value
    if (isinstance(ku, (np.ndarray)) == True) and (len(ku) == 1):
        ku = float(ku)
       
    return ku


#%%

[docs]
def moments (X, axis=None):
    """
    Computes the first 4th moments of a vector (1d, ie. spectrum or waveform) 
    or spectrogram (2d) 
    
    - mean
    - variance
    - skewness
    - kurtosis
    
    Parameters
    ----------
    X : ndarray of floats
        vector (1d : spectrum, waveform) or matrix (2d : spectrogram). 
    axis : interger, optional, default is None
        if spectrogram (2d), select the axis to estimate the moments.
        
    Returns
    -------
    mean : float 
        mean of X
    var : float 
        variance  of X
    skew : float
        skewness of X
    kurt : float
        kurtosis of X

    Examples
    --------
    >>> from maad import sound, util
    >>> s, fs = sound.load('../data/spinetail.wav')
    >>> mean, var, skew, kurt = util.moments(s)
    >>> print ('mean:%2.4f / var:%2.4f / skew:%2.4f / kurt:%2.4f' %(mean, var, skew, kurt)) 
    mean:-0.0000 / var:0.0012 / skew:-0.0065 / kurt:24.7116    
    """
    # force X to be ndarray
    X = np.asarray(X)
    
    return mean(X, axis), var(X, axis), skewness(X, axis), kurtosis(X, axis)


#%%

[docs]
def entropy (x, axis=0):
    """
    Compute the entropy of a vector (waveform) or matrix (spectrogram).
    
    Parameters
    ----------
    x : ndarray of floats
        x is a vector (1d) or a matrix (2d)

    axis : int, optional, default is 0
        select the axis where the entropy is computed
        if x is a vector, axis=0
        if x is a 2d ndarray, axis=0 => rows, axis=1 => columns
                
    Returns
    -------
    H : float or ndarray of floats
        entropy of x
        
    Examples
    --------
    >>> from maad import sound, util
    >>> s, fs = sound.load('../data/spinetail.wav')
    >>> H = util.entropy(s)
    >>> print ('Entropy is %2.4f' %H) 
    Entropy is 0.9998 
        
    """
    # force x to be ndarray
    x = np.asarray(x)
    
    if isinstance(x, (np.ndarray)) == True:
        if x.ndim > axis:
            if x.shape[axis] == 0: 
                print ("WARNING: x is empty") 
                H = None 
            elif x.shape[axis] == 1:
                H = 0 # null entropy
            elif x.any() == 0: # test if there are only zeros
                if x.ndim == 1 : # case vector
                    H = 1 # entropy = 1
                else : # case matrix
                    if axis == 0 : H = np.ones(x.shape[1]) # entropy = 1
                    if axis == 1 : H = np.ones(x.shape[0]) # entropy = 1
            else:
                # if datain contains negative values -> rescale the signal between 
                # between posSitive values (for example (0,1))
                if np.min(x)<0:
                    x = linear_scale(x,minval=0,maxval=1)
                # length of datain along axis
                n = x.shape[axis]
                # Tranform the signal into a Probability mass function (pmf)
                # Sum(pmf) = 1
                if axis == 0 :
                    pmf = x/np.sum(x,axis)
                elif axis == 1 :                     
                    pmf = (x.transpose()/np.sum(x,axis)).transpose()
                pmf[pmf==0] = _MIN_
                #normalized by the length : H=>[0,1]
                H = -np.sum(pmf*np.log(pmf),axis)/np.log(n)
        else:
            print ("WARNING :axis is greater than the dimension of the array")    
            H = None 
    else:
        print ("WARNING: x must be ndarray")   
        H = None 

    return H