Source code for maad.sound.spectral_subtraction

#!/usr/bin/env python
"""
Collection of functions to remove background noise from spectrogram using
spectral subtraction methods
"""
#
# Authors:  Juan Sebastian ULLOA <lisofomia@gmail.com>
#           Sylvain HAUPERT <sylvain.haupert@mnhn.fr>
#
# License: New BSD License

# =============================================================================
# Load the modules
# =============================================================================
# Import external modules
from maad.util import (plot1d, plot2d, running_mean,
                       get_unimode, mean_dB, power2dB)
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import morphology
from skimage.morphology import reconstruction
from scipy import signal
# min value
import sys
_MIN_ = sys.float_info.min

# Import internal modules

# %%
# =============================================================================
# public functions
# =============================================================================

[docs] def remove_background(Sxx, gauss_win=50, gauss_std=25, beta1=1, beta2=1, llambda=1, verbose=False, display = False, savefig=None, **kwargs): """ Remove background noise using spectral subtraction. Based on the spectrum of the A posteriori noise profile. It computes an atenuation map in the time-frequency domain. See [1]_ or [2]_ for more detail about the algorithm. Parameters ---------- Sxx : 2d ndarray of scalars Spectrogram gauss_win=50 : int, optional, default: 50 Number of points in the gaussian window gauss_std = 25 The standard deviation, sigma used to create the gaussian window beta1 : scaler, optional, default: 1 beta1 has to be >0 Should be close to 1 beta2: scaler, optional, default: 1 beta2 has to be >0 better to not change llambda : int, optional, default: 1 over-subtraction factor to compensate variation of noise amplitude. Should be close to 1 verbose : boolean, optional, default is False Print messages and speed display : boolean, optional, default is False Display the signal if True savefig : string, optional, default is None Root filename (with full path) is required to save the figures. Postfix is added to the root filename. kwargs, optional. This parameter is used by plt.plot and savefig functions - savefilename : str, optional, default :'_spectro_after_noise_subtraction.png' Postfix of the figure filename - figsize : tuple of integers, optional, default: (4,10) width, height in inches. - title : string, optional, default : 'Spectrogram' title of the figure - xlabel : string, optional, default : 'Time [s]' label of the horizontal axis - ylabel : string, optional, default : 'Amplitude [AU]' label of the vertical axis - cmap : string or Colormap object, optional, default is 'gray' See https://matplotlib.org/examples/color/colormaps_reference.html in order to get all the existing colormaps examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic', 'viridis'... - vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with norm to normalize luminance data. Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored. - extent : scalars (left, right, bottom, top), optional, default: None The location, in data-coordinates, of the lower-left and upper-right corners. If `None`, the image is positioned such that the pixel centers fall on zero-based (row, column) indices. - dpi : integer, optional, default is 96 Dot per inch. For printed version, choose high dpi (i.e. dpi=300) => slow For screen version, choose low dpi (i.e. dpi=96) => fast - format : string, optional, default is 'png' Format to save the figure ... and more, see matplotlib Returns ------- Sxx_out : 2d ndarray of scalar Spectrogram after denoising noise_profile : 1d darray of scalar noise_profile BGNxx : 2d ndarray of scalar Noise map References ---------- .. [1] Steven F. Boll, "Suppression of Acoustic Noise in Speech Using Spectral Subtraction", IEEE Transactions on Signal Processing, 27(2),pp 113-120,1979 `DOI:10.1109/TASSP.1979.1163209 <https://doi.org/10.1109/TASSP.1979.1163209>`_ .. [2] Y. Ephraim and D. Malah, Speech enhancement using a minimum mean square error short-time spectral amplitude estimator, IEEE. Transactions in Acoust., Speech, Signal Process., vol. 32, no. 6, pp. 11091121, Dec. 1984. `DOI:10.1109/TASSP.1984.1164453 <https://doi.org/10.1109/TASSP.1984.1164453>`_ Examples -------- Load audio recording and convert it into spectrogram >>> s, fs = maad.sound.load('../data/rock_savanna.wav') >>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs) Convert linear spectrogram into dB and add 96dB (which is the maximum dB for 16 bits wav) in order to have positive values >>> Sxx_dB = maad.util.power2dB(Sxx) + 96 Remove stationnary noise from the spectrogram in dB >>> Sxx_dB_noNoise, noise_profile, _ = maad.sound.remove_background(Sxx_dB) Plot both spectrograms >>> import matplotlib.pyplot as plt >>> import numpy as np >>> fig, (ax1, ax2) = plt.subplots(2, 1) >>> maad.util.plot2d(Sxx_dB, ax=ax1, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40) >>> maad.util.plot2d(Sxx_dB_noNoise, ax=ax2, extent=ext, title='Without stationary noise', vmin=np.median(Sxx_dB_noNoise), vmax=np.median(Sxx_dB_noNoise)+40) >>> fig.set_size_inches(15,8) >>> fig.tight_layout() """ if verbose: print(72 * '_') print('Determine the profile of the stochastic background noise...') Nf, Nw = Sxx.shape # average spectrum (assumed to be ergodic) mean_profile = np.mean(Sxx, 1) # White Top Hat (to remove non uniform background) = i - opening(i) selem = signal.gaussian(gauss_win, gauss_std) noise_profile = morphology.grey_opening(mean_profile, structure=selem) # Remove the artefact at the end of the spectrum (2 highest frequencies) noise_profile[-2:] = mean_profile[-2:] noise_profile[:2] = mean_profile[:2] # Create a matrix with the noise profile noise_spectro = np.kron(np.ones((Nw, 1)), noise_profile) noise_spectro = noise_spectro.transpose() # snr estimate a posteriori SNR_est = Sxx - noise_spectro # to avoid dividing by 0 SNR_est[SNR_est <= 0] = 0 noise_spectro[noise_spectro == 0] = _MIN_ # ratio SNR_est = (Sxx/noise_spectro) # keep only positive values SNR_est = SNR_est*(SNR_est > 0) # compute attenuation map # if llambda, beta1 and beta 2 are equal to 1, it is (1 - noise_spectro) an_lk = (1-llambda*((1./(SNR_est+1))**beta1))**beta2 an_lk = an_lk*(an_lk > 0) # keep only positive values if verbose: print('Remove the stochastic background noise...') # Apply the attenuation map to the STFT coefficients Sxx_out = an_lk*Sxx # noise map BGNxx BGNxx = Sxx - Sxx_out # if nan in the image, convert nan into 0 np.nan_to_num(Sxx_out, 0) # Set negative value to 0 Sxx_out[Sxx_out < 0] = 0 # Display if display: ylabel = kwargs.pop('ylabel', 'Frequency [Hz]') xlabel = kwargs.pop('xlabel', 'Time [sec]') title = kwargs.pop('title', 'Spectrogram without stationnary noise') cmap = kwargs.pop('cmap', 'gray') vmin = kwargs.pop('vmin', np.min(Sxx_out)) vmax = kwargs.pop('vmax', np.max(Sxx_out)) extent = kwargs.pop('extent', None) if extent is not None: fn = np.arange(0, Nf)*(extent[3]-extent[2])/(Nf-1) + extent[2] xlabel = 'frequency [Hz]' figsize = kwargs.pop('figsize', (4, 0.33*(extent[1]-extent[0]))) else: fn = np.arange(Nf) xlabel = 'pseudofrequency [points]' figsize = kwargs.pop('figsize', (4, 13)) _, fig = plot2d(Sxx_out, extent=extent, figsize=figsize, title=title, ylabel=ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) fig2, (ax1, ax2) = plt.subplots(2, sharex=True) fig2.set_size_inches((5, 4)) ax1, _ = plot1d(fn, mean_profile, ax=ax1, legend='Original profile', color='b', xlabel='', ylabel= 'Amplitude [dB]', figtitle='') ax1, _ = plot1d(fn, np.mean(BGNxx, axis=1), ax=ax1, legend='Noise profile', color='r', xlabel='', ylabel='Amplitude [dB]', figtitle='') ax2, _ = plot1d(fn, np.mean(Sxx_out, axis=1), ax=ax2, color='k', legend='Denoized profile', xlabel=xlabel, ylabel= 'Amplitude [dB]', figtitle='') fig2.tight_layout() # SAVE FIGURE if savefig is not None: dpi = kwargs.pop('dpi', 96) dpi = kwargs.pop('dpi', 96) bbox_inches = kwargs.pop('bbox_inches', 'tight') format = kwargs.pop('format', 'png') savefilename = kwargs.pop( 'savefilename', '_spectro_after_noise_subtraction') filename = savefig+savefilename+'.'+format print('\n''save figure : %s' % filename) fig.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches, format=format, **kwargs) return Sxx_out, noise_profile, BGNxx
# %%
[docs] def remove_background_morpho(Sxx, q=0.1, display=False, savefig=None, **kwargs): """ Remove background noise in a spectrogram using mathematical morphology tool. Parameters ---------- Sxx : 2D numpy array Original spectrogram (or image) q : float Quantile which must be between 0 and 1 inclusive. The closest to one, the finest details are kept display : boolean, optional, default is False Display the signal if True savefig : string, optional, default is None Root filename (with full path) is required to save the figures. Postfix is added to the root filename. kwargs, optional. This parameter is used by plt.plot and savefig functions - savefilename : str, optional, default :'_spectro_after_noise_subtraction.png' Postfix of the figure filename - figsize : tuple of integers, optional, default: (4,10) width, height in inches. - title : string, optional, default : 'Spectrogram' title of the figure - xlabel : string, optional, default : 'Time [s]' label of the horizontal axis - ylabel : string, optional, default : 'Amplitude [AU]' label of the vertical axis - cmap : string or Colormap object, optional, default is 'gray' See https://matplotlib.org/examples/color/colormaps_reference.html in order to get all the existing colormaps examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic', 'viridis'... - vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with norm to normalize luminance data. Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored. - extent : scalars (left, right, bottom, top), optional, default: None The location, in data-coordinates, of the lower-left and upper-right corners. If `None`, the image is positioned such that the pixel centers fall on zero-based (row, column) indices. - dpi : integer, optional, default is 96 Dot per inch. For printed version, choose high dpi (i.e. dpi=300) => slow For screen version, choose low dpi (i.e. dpi=96) => fast - format : string, optional, default is 'png' Format to save the figure ... and more, see matplotlib Returns ------- Sxx_out : 2d ndarray of scalar Spectrogram after denoising noise_profile : 1d ndarray of scalar Noise profile BGNxx : 2d ndarray of scalar Noise map Examples -------- Load audio recording and convert it into spectrogram >>> s, fs = maad.sound.load('../data/rock_savanna.wav') >>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs) Convert linear spectrogram into dB >>> Sxx_dB = maad.util.power2dB(Sxx) +96 Remove stationnary noise from the spectrogram >>> Sxx_dB_noNoise,_,_ = maad.sound.remove_background_morpho(Sxx_dB, q=0.5) Plot both spectrograms >>> import matplotlib.pyplot as plt >>> import numpy as np >>> fig, (ax1, ax2) = plt.subplots(2, 1) >>> maad.util.plot2d(Sxx_dB, ax=ax1, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40) >>> maad.util.plot2d(Sxx_dB_noNoise, ax=ax2, extent=ext, title='Without stationary noise',vmin=np.median(Sxx_dB_noNoise), vmax=np.median(Sxx_dB_noNoise)+40) >>> fig.set_size_inches(15,8) >>> fig.tight_layout() Load audio recording and convert it into spectrogram >>> s, fs = maad.sound.load('../data/rock_savanna.wav') >>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs, tcrop=(0,20)) >>> Sxx_dB = maad.util.power2dB(Sxx) +96 Remove stationnary noise from the spectrogram with different q >>> Sxx_dB_noNoise_q25,_,_ = maad.sound.remove_background_morpho(Sxx_dB, q=0.25) >>> Sxx_dB_noNoise_q50,_,_ = maad.sound.remove_background_morpho(Sxx_dB, q=0.5) >>> Sxx_dB_noNoise_q75,_,_ = maad.sound.remove_background_morpho(Sxx_dB, q=0.75) Plot 3 spectrograms >>> import matplotlib.pyplot as plt >>> fig, (ax1, ax2, ax3) = plt.subplots(3, 1) >>> maad.util.plot2d(Sxx_dB_noNoise_q25, ax=ax1, extent=ext, title='Without stationary noise (q=0.25)',vmin=np.median(Sxx_dB_noNoise_q25), vmax=np.median(Sxx_dB_noNoise_q25)+40) >>> maad.util.plot2d(Sxx_dB_noNoise_q50, ax=ax2, extent=ext, title='Without stationary noise (q=0.50)',vmin=np.median(Sxx_dB_noNoise_q50), vmax=np.median(Sxx_dB_noNoise_q50)+40) >>> maad.util.plot2d(Sxx_dB_noNoise_q75, ax=ax3, extent=ext, title='Without stationary noise (q=0.75)',vmin=np.median(Sxx_dB_noNoise_q75), vmax=np.median(Sxx_dB_noNoise_q75)+40) >>> fig.set_size_inches(15,9) >>> fig.tight_layout() """ # Use morpho math tools to estimate the background noise BGNxx = reconstruction(seed=Sxx-(np.quantile(Sxx, q)), mask=Sxx, method='dilation') Sxx_out = Sxx - BGNxx # noise profile along time axis noise_profile = np.mean(BGNxx, 1) # Set negative value to 0 Sxx_out[Sxx_out < 0] = 0 # Display if display: ylabel = kwargs.pop('ylabel', 'Frequency [Hz]') xlabel = kwargs.pop('xlabel', 'Time [sec]') title = kwargs.pop('title', 'Spectrogram without stationnary noise') cmap = kwargs.pop('cmap', 'gray') vmin = kwargs.pop('vmin', np.min(Sxx_out)) vmax = kwargs.pop('vmax', np.max(Sxx_out)) extent = kwargs.pop('extent', None) Nf, Nw = Sxx.shape if extent is not None: fn = np.arange(0, Nf)*(extent[3]-extent[2])/(Nf-1) + extent[2] xlabel = 'frequency [Hz]' figsize = kwargs.pop('figsize', (4, 0.33*(extent[1]-extent[0]))) else: fn = np.arange(Nf) xlabel = 'pseudofrequency [points]' figsize = kwargs.pop('figsize', (4, 13)) _, fig = plot2d(BGNxx, extent=extent, figsize=figsize, title='Noise map', ylabel=ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) _, fig = plot2d(Sxx_out, extent=extent, figsize=figsize, title=title, ylabel=ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) fig2, (ax1, ax2) = plt.subplots(2, sharex=True) fig2.set_size_inches((5, 4)) ax1, _ = plot1d(fn, np.mean(Sxx, axis=1), ax=ax1, legend='Original profile', color='b', xlabel='', ylabel= 'Amplitude [dB]', figtitle='') ax1, _ = plot1d(fn, np.mean(BGNxx, 1), ax=ax1, legend='Noise profile', color='r', xlabel='', ylabel='Amplitude [dB]', figtitle='') ax2, _ = plot1d(fn, np.mean(Sxx_out, axis=1), ax=ax2, color='k', legend='Denoized profile', xlabel=xlabel, ylabel= 'Amplitude [dB]', figtitle='') fig2.tight_layout() # SAVE FIGURE if savefig is not None: dpi = kwargs.pop('dpi', 96) dpi = kwargs.pop('dpi', 96) bbox_inches = kwargs.pop('bbox_inches', 'tight') format = kwargs.pop('format', 'png') savefilename = kwargs.pop( 'savefilename', '_spectro_after_noise_subtraction') filename = savefig+savefilename+'.'+format print('\n''save figure : %s' % filename) fig.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches, format=format, **kwargs) return Sxx_out, noise_profile, BGNxx
# %%
[docs] def remove_background_along_axis(Sxx, mode='median', axis=1, N=25, N_bins=50, display=False, savefig=None, **kwargs): """ Get the noisy profile along the defined axis and remove this profile from the spectrogram. Parameters ---------- Sxx : 2D numpy array Original spectrogram (or image) mode : str, optional, default is 'median' Select the mode to remove the noise Possible values for mode are : - 'ale' : Adaptative Level Equalization algorithm [Lamel & al. 1981] - 'median' : subtract the median value - 'mean' : subtract the mean value (DC) axis : integer, default is 1 if matrix, estimate the mode for each row (axis=0) or each column (axis=1) N : int, default is 25 length of window to compute the running mean of the noise profile N_bins : int (only for mode = "ale"), default is 50 number of bins to compute the histogram display : boolean, optional, default is False Display the signal if True savefig : string, optional, default is None Root filename (with full path) is required to save the figures. Postfix is added to the root filename. kwargs, optional. This parameter is used by plt.plot and savefig functions - savefilename : str, optional, default :'_spectro_after_noise_subtraction.png' Postfix of the figure filename - figsize : tuple of integers, optional, default: (4,10) width, height in inches. - title : string, optional, default : 'Spectrogram' title of the figure - xlabel : string, optional, default : 'Time [s]' label of the horizontal axis - ylabel : string, optional, default : 'Amplitude [AU]' label of the vertical axis - cmap : string or Colormap object, optional, default is 'gray' See https://matplotlib.org/examples/color/colormaps_reference.html in order to get all the existing colormaps examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic', 'viridis'... - vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with norm to normalize luminance data. Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored. - extent : scalars (left, right, bottom, top), optional, default: None The location, in data-coordinates, of the lower-left and upper-right corners. If `None`, the image is positioned such that the pixel centers fall on zero-based (row, column) indices. - dpi : integer, optional, default is 96 Dot per inch. For printed version, choose high dpi (i.e. dpi=300) => slow For screen version, choose low dpi (i.e. dpi=96) => fast - format : string, optional, default is 'png' Format to save the figure ... and more, see matplotlib Returns ------- Sxx_out : 2d ndarray of scalar Spectrogram after denoising noise_profile : 1d ndarray of scalar Noise profile References ---------- .. [1] Towsey, M., 2013. Noise Removal from Wave-forms and Spectrograms Derived from Natural Recordings of the Environment. Queensland University of Technology, Brisbane Examples -------- Load audio recording and convert it into spectrogram >>> s, fs = maad.sound.load('../data/rock_savanna.wav') >>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs) Convert linear spectrogram into dB >>> Sxx_dB = maad.util.power2dB(Sxx) + 96 Remove stationnary noise from the spectrogram with modes 'ale', 'median', and 'mean'. >>> Sxx_dB_noNoise_ale,_ = maad.sound.remove_background_along_axis(Sxx_dB, mode='ale') >>> Sxx_dB_noNoise_med,_ = maad.sound.remove_background_along_axis(Sxx_dB, mode='median') >>> Sxx_dB_noNoise_mean,_ = maad.sound.remove_background_along_axis(Sxx_dB, mode='mean') Plot spectrograms >>> import matplotlib.pyplot as plt >>> import numpy as np >>> fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1) >>> maad.util.plot2d(Sxx_dB, ax=ax1, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40) >>> maad.util.plot2d(Sxx_dB_noNoise_ale, ax=ax2, extent=ext, title='Without stationary noise (mode = ''ale'')',vmin=np.median(Sxx_dB_noNoise_ale), vmax=np.median(Sxx_dB_noNoise_ale)+40) >>> maad.util.plot2d(Sxx_dB_noNoise_med, ax=ax3, extent=ext, title='Without stationary noise (mode = ''med'')',vmin=np.median(Sxx_dB_noNoise_med), vmax=np.median(Sxx_dB_noNoise_med)+40) >>> maad.util.plot2d(Sxx_dB_noNoise_mean, ax=ax4, extent=ext, title='Without stationary noise (mode = ''mean'')',vmin=np.median(Sxx_dB_noNoise_mean), vmax=np.median(Sxx_dB_noNoise_mean)+40) >>> fig.set_size_inches(8,10) >>> fig.tight_layout() """ # get the noise profile, N define the running mean size of the histogram # in case of mode='ale' noise_profile = get_unimode(Sxx, mode, axis, N=7, N_bins=N_bins) # smooth the profile by removing spurious thin peaks noise_profile = running_mean(noise_profile, N) # Remove horizontal noisy peaks profile (BGN_VerticalNoise is an estimation) # and negative value to zero if axis == 1: Sxx_out = Sxx - noise_profile[..., np.newaxis] elif axis == 0: Sxx_out = Sxx - noise_profile[np.newaxis, ...] # Set negative value to 0 Sxx_out[Sxx_out < 0] = 0 # Display if display: ylabel = kwargs.pop('ylabel', 'Frequency [Hz]') xlabel = kwargs.pop('xlabel', 'Time [sec]') title = kwargs.pop('title', 'Spectrogram without stationnary noise') cmap = kwargs.pop('cmap', 'gray') vmin = kwargs.pop('vmin', np.min(Sxx_out)) vmax = kwargs.pop('vmax', np.max(Sxx_out)) extent = kwargs.pop('extent', None) Nf, Nw = Sxx.shape if extent is not None: fn = np.arange(0, Nf)*(extent[3]-extent[2])/(Nf-1) + extent[2] xlabel = 'frequency [Hz]' figsize = kwargs.pop('figsize', (4, 0.33*(extent[1]-extent[0]))) else: fn = np.arange(Nf) xlabel = 'pseudofrequency [points]' figsize = kwargs.pop('figsize', (4, 13)) _, fig1 = plot2d(Sxx_out, extent=extent, figsize=figsize, title=title, ylabel= ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) fig2, (ax1, ax2) = plt.subplots(2, sharex=True) fig2.set_size_inches((5, 4)) ax1, _ = plot1d(fn, mean_dB(Sxx, axis=axis), ax=ax1, legend='Original profile', color='b', xlabel='', ylabel= 'Amplitude [dB]', figtitle='') ax1, _ = plot1d(fn, noise_profile, ax=ax1, legend='Noise profile', color='r', xlabel='', ylabel='Amplitude [dB]', figtitle='') ax2, _ = plot1d(fn, mean_dB(Sxx_out, axis=axis), ax=ax2, color='k', legend='Denoized profile', xlabel=xlabel, ylabel= 'Amplitude [dB]', figtitle='') fig2.tight_layout() # SAVE FIGURE if savefig is not None: dpi = kwargs.pop('dpi', 96) dpi = kwargs.pop('dpi', 96) bbox_inches = kwargs.pop('bbox_inches', 'tight') format = kwargs.pop('format', 'png') savefilename = kwargs.pop( 'savefilename', '_spectro_after_noise_subtraction') filename = savefig+savefilename+'.'+format print('\n''save figure : %s' % filename) fig1.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches, format=format, **kwargs) return Sxx_out, noise_profile
# %%
[docs] def median_equalizer(Sxx, display=False, savefig=None, **kwargs): """ Remove background noise in spectrogram using median equalizer. Parameters ---------- Sxx : 2D numpy array Original spectrogram (or image), !!! not in dB display : boolean, optional, default is False Display the signal if True savefig : string, optional, default is None Root filename (with full path) is required to save the figures. Postfix is added to the root filename. kwargs, optional. This parameter is used by plt.plot and savefig functions - savefilename : str, optional, default :'_spectro_after_noise_subtraction.png' Postfix of the figure filename - figsize : tuple of integers, optional, default: (4,10) width, height in inches. - title : string, optional, default : 'Spectrogram' title of the figure - xlabel : string, optional, default : 'Time [s]' label of the horizontal axis - ylabel : string, optional, default : 'Amplitude [AU]' label of the vertical axis - cmap : string or Colormap object, optional, default is 'gray' See https://matplotlib.org/examples/color/colormaps_reference.html in order to get all the existing colormaps examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic', 'viridis'... - vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with norm to normalize luminance data. Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored. - extent : scalars (left, right, bottom, top), optional, default: None The location, in data-coordinates, of the lower-left and upper-right corners. If `None`, the image is positioned such that the pixel centers fall on zero-based (row, column) indices. - dpi : integer, optional, default is 96 Dot per inch. For printed version, choose high dpi (i.e. dpi=300) => slow For screen version, choose low dpi (i.e. dpi=96) => fast - format : string, optional, default is 'png' Format to save the figure ... and more, see matplotlib Returns ------- Sxx_out : 2d ndarray of scalar Spectrogram after denoising References ---------- .. [1] This function has been proposed first by Carol BEDOYA <carol.bedoya@pg.canterbury.ac.nz> Adapted by S. Haupert Oct 9, 2018 for Python Examples -------- Load audio recording and convert it into spectrogram >>> s, fs = maad.sound.load('../data/rock_savanna.wav') >>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs) Convert linear spectrogram into dB >>> Sxx_dB = maad.util.power2dB(Sxx) +96 Remove stationnary noise from the spectrogram >>> Sxx_noNoise = maad.sound.median_equalizer(Sxx) >>> Sxx_dB_noNoise = maad.util.power2dB(Sxx_noNoise) Plot both spectrograms >>> import matplotlib.pyplot as plt >>> import numpy as np >>> fig, (ax1, ax2) = plt.subplots(2, 1) >>> maad.util.plot2d(Sxx_dB, ax=ax1, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40) >>> maad.util.plot2d(Sxx_dB_noNoise, ax=ax2, extent=ext, title='Without stationary noise',vmin=np.median(Sxx_dB_noNoise), vmax=np.median(Sxx_dB_noNoise)+40) >>> fig.set_size_inches(15,8) >>> fig.tight_layout() """ Sxx_out = (Sxx-np.median(Sxx, axis=1)[..., np.newaxis]) # Numerator for normalization. Test if values of norm are <=0 and set them # to the highest value in Sxx. This will ensure that the result of the # normalization will be lower than 1 norm = (np.median(Sxx, axis=1)-np.min(Sxx, axis=1)) norm[norm <= 0] = Sxx.max() # normalization. Test if the numerator is 0 Sxx_out = Sxx_out/norm[..., np.newaxis] # if the ratio is < 1, set the value to 1. # Values < 1 are noise and should not be less than 1. # When Sxx_out is converted into dB => log10(1) => 0 Sxx_out[Sxx_out < 1] = 1 # Display if display: ylabel = kwargs.pop('ylabel', 'Frequency [Hz]') xlabel = kwargs.pop('xlabel', 'Time [sec]') title = kwargs.pop('title', 'Spectrogram without stationnary noise') cmap = kwargs.pop('cmap', 'gray') extent = kwargs.pop('extent', None) if extent is not None: figsize = kwargs.pop('figsize', (4, 0.33*(extent[1]-extent[0]))) else: figsize = kwargs.pop('figsize', (4, 13)) # convert into dB Sxx_out_dB = power2dB(Sxx_out) vmin = kwargs.pop('vmin', 0) vmax = kwargs.pop('vmax', np.max(Sxx_out_dB)) _, fig = plot2d(Sxx_out_dB, extent=extent, figsize=figsize, title=title, ylabel=ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) # SAVE FIGURE if savefig is not None: dpi = kwargs.pop('dpi', 96) dpi = kwargs.pop('dpi', 96) bbox_inches = kwargs.pop('bbox_inches', 'tight') format = kwargs.pop('format', 'png') savefilename = kwargs.pop( 'savefilename', '_spectro_after_noise_subtraction') filename = savefig+savefilename+'.'+format print('\n''save figure : %s' % filename) fig.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches, format=format, **kwargs) return Sxx_out
#%%
[docs] def pcen(Sxx, gain=0.98, bias=2, power=0.5, b=0.025, eps=1e-6, display=False, savefig=False, **kwargs): """ Per-Channel Energy Normalization (PCEN) This function normalizes a time-frequency representation Sxx by performing automatic gain control, followed by nonlinear compression [1]_ This function was adapted from librosa PCEN function [2]_ to match linear spectrogram representation Parameters ---------- Sxx : 2D numpy array Original spectrogram (or image) gain : scalar (>=0), optional, default is 0.98 Gain factor. Typical values should be less than 1. bias : scalar (>=0), optional, default is 2 Bias point of the nonlinear compression power : scalar (>=0), optional, default is 0.5 Compression exponent. Typical values should be between 0 and 0.5. Smaller values of power result in stronger compression. At the limit power=0, polynomial compression becomes logarithmic. b : scalar between [0, 1], optional, default is 0.025 The filter coefficient for the low-pass filter. eps : scalar (>0), optional, default is 1e-6 A small constant used to ensure numerical stability of the filter. display : boolean, optional, default is False Display the signal if True savefig : string, optional, default is None Root filename (with full path) is required to save the figures. Postfix is added to the root filename. kwargs, optional. This parameter is used by plt.plot and savefig functions - savefilename : str, optional, default :'_spectro_after_noise_subtraction.png' Postfix of the figure filename - figsize : tuple of integers, optional, default: (4,10) width, height in inches. - title : string, optional, default : 'Spectrogram' title of the figure - xlabel : string, optional, default : 'Time [s]' label of the horizontal axis - ylabel : string, optional, default : 'Amplitude [AU]' label of the vertical axis - cmap : string or Colormap object, optional, default is 'gray' See https://matplotlib.org/examples/color/colormaps_reference.html in order to get all the existing colormaps examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic', 'viridis'... - vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with norm to normalize luminance data. Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored. - extent : scalars (left, right, bottom, top), optional, default: None The location, in data-coordinates, of the lower-left and upper-right corners. If `None`, the image is positioned such that the pixel centers fall on zero-based (row, column) indices. - dpi : integer, optional, default is 96 Dot per inch. For printed version, choose high dpi (i.e. dpi=300) => slow For screen version, choose low dpi (i.e. dpi=96) => fast - format : string, optional, default is 'png' Format to save the figure ... and more, see matplotlib Returns ------- Sxx_out : 2d ndarray of scalar Spectrogram after PCEN enhance_profile : 1d ndarray of scalar Enhance profile PCENxx : 2d ndarray of scalar enhanced map References ---------- .. [1] Lostanlen, V., Salamon, J., McFee, B., Cartwright, M., Farnsworth, A., Kelling, S., and Bello, J. P. Per-Channel Energy Normalization: Why and How. IEEE Signal Processing Letters, 26(1), 39-43. `DOI: 10.1109/LSP.2018.2878620 <https://doi.org/10.1109/LSP.2018.2878620>`_ .. [2] McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis,Matt McVicar, Eric Battenberg, and Oriol Nieto. “librosa: Audio and music signal analysis in python.” In Proceedings of the 14th python in science conference, pp. 18-25. 2015. https://librosa.org/doc/main/generated/librosa.pcen.html """ # PCEN # 1. smooth the spectrogram M = signal.lfilter([b], [1, b - 1], Sxx) # 2. take the inverse of smoothed spectrogram smooth = (eps + M)**(-gain) # 3. Nonlinear compression Sxx_out = (Sxx * smooth + bias)**power - bias**power # Enhance map (2d) PCENxx = Sxx - Sxx_out # Enhance profile along time axis (1d) enhance_profile = np.mean(PCENxx, 1) # Display if display: ylabel = kwargs.pop('ylabel', 'Frequency [Hz]') xlabel = kwargs.pop('xlabel', 'Time [sec]') title = kwargs.pop( 'title', 'Spectrogram without stationnary noise after PCEN') cmap = kwargs.pop('cmap', 'gray') vmin = kwargs.pop('vmin', np.min(Sxx_out)) vmax = kwargs.pop('vmax', np.max(Sxx_out)) extent = kwargs.pop('extent', None) Nf, Nw = Sxx.shape if extent is not None: fn = np.arange(0, Nf)*(extent[3]-extent[2])/(Nf-1) + extent[2] xlabel = 'frequency [Hz]' figsize = kwargs.pop( 'figsize', (4, 0.33*(extent[1]-extent[0]))) else: fn = np.arange(Nf) xlabel = 'pseudofrequency [points]' figsize = kwargs.pop('figsize', (4, 13)) _, fig = plot2d(PCENxx, extent=extent, figsize=figsize, title='Enhance map', ylabel=ylabel, xlabel=xlabel, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) _, fig = plot2d(Sxx_out, extent=extent, figsize=figsize, title=title, ylabel=ylabel, xlabel=xlabel, vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) fig2, (ax1, ax2) = plt.subplots(2, sharex=True) fig2.set_size_inches((5, 4)) ax1, _ = plot1d(fn, np.mean(Sxx, axis=1), ax=ax1, legend='Original profile', color='b', xlabel='', ylabel='Amplitude [dB]', figtitle='') ax1, _ = plot1d(fn, np.mean(PCENxx, 1), ax=ax1, legend='Enhance profile', color='r', xlabel='', ylabel='Amplitude [dB]', figtitle='') ax2, _ = plot1d(fn, np.mean(Sxx_out, axis=1), ax=ax2, color='k', legend='Profile after PCEN', xlabel=xlabel, ylabel='Amplitude [dB]', figtitle='') fig2.tight_layout() # SAVE FIGURE if savefig is not None: dpi = kwargs.pop('dpi', 96) dpi = kwargs.pop('dpi', 96) bbox_inches = kwargs.pop('bbox_inches', 'tight') format = kwargs.pop('format', 'png') savefilename = kwargs.pop( 'savefilename', '_spectro_after_PCEN') filename = savefig+savefilename+'.'+format print('\n''save figure : %s' % filename) fig.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches, format=format, **kwargs) return Sxx_out, enhance_profile, PCENxx