#!/usr/bin/env python
"""
Collection of functions to remove background noise from spectrogram using
spectral subtraction methods
"""
#
# Authors: Juan Sebastian ULLOA <lisofomia@gmail.com>
# Sylvain HAUPERT <sylvain.haupert@mnhn.fr>
#
# License: New BSD License
# =============================================================================
# Load the modules
# =============================================================================
# Import external modules
from maad.util import (plot1d, plot2d, running_mean,
get_unimode, mean_dB, power2dB)
import matplotlib.pyplot as plt
import numpy as np
from scipy.ndimage import morphology
from skimage.morphology import reconstruction
from scipy import signal
# min value
import sys
_MIN_ = sys.float_info.min
# Import internal modules
# %%
# =============================================================================
# public functions
# =============================================================================
[docs]
def remove_background(Sxx, gauss_win=50, gauss_std=25, beta1=1, beta2=1,
llambda=1, verbose=False, display = False,
savefig=None, **kwargs):
"""
Remove background noise using spectral subtraction.
Based on the spectrum of the A posteriori noise profile.
It computes an atenuation map in the time-frequency domain.
See [1]_ or [2]_ for more detail about the algorithm.
Parameters
----------
Sxx : 2d ndarray of scalars
Spectrogram
gauss_win=50 : int, optional, default: 50
Number of points in the gaussian window
gauss_std = 25
The standard deviation, sigma used to create the gaussian window
beta1 : scaler, optional, default: 1
beta1 has to be >0
Should be close to 1
beta2: scaler, optional, default: 1
beta2 has to be >0
better to not change
llambda : int, optional, default: 1
over-subtraction factor to compensate variation of noise amplitude.
Should be close to 1
verbose : boolean, optional, default is False
Print messages and speed
display : boolean, optional, default is False
Display the signal if True
savefig : string, optional, default is None
Root filename (with full path) is required to save the figures. Postfix
is added to the root filename.
kwargs, optional. This parameter is used by plt.plot and savefig functions
- savefilename : str, optional, default :'_spectro_after_noise_subtraction.png'
Postfix of the figure filename
- figsize : tuple of integers, optional, default: (4,10)
width, height in inches.
- title : string, optional, default : 'Spectrogram'
title of the figure
- xlabel : string, optional, default : 'Time [s]'
label of the horizontal axis
- ylabel : string, optional, default : 'Amplitude [AU]'
label of the vertical axis
- cmap : string or Colormap object, optional, default is 'gray'
See https://matplotlib.org/examples/color/colormaps_reference.html
in order to get all the existing colormaps
examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic',
'viridis'...
- vmin, vmax : scalar, optional, default: None
`vmin` and `vmax` are used in conjunction with norm to normalize
luminance data. Note if you pass a `norm` instance, your
settings for `vmin` and `vmax` will be ignored.
- extent : scalars (left, right, bottom, top), optional, default: None
The location, in data-coordinates, of the lower-left and
upper-right corners. If `None`, the image is positioned such that
the pixel centers fall on zero-based (row, column) indices.
- dpi : integer, optional, default is 96
Dot per inch.
For printed version, choose high dpi (i.e. dpi=300) => slow
For screen version, choose low dpi (i.e. dpi=96) => fast
- format : string, optional, default is 'png'
Format to save the figure
... and more, see matplotlib
Returns
-------
Sxx_out : 2d ndarray of scalar
Spectrogram after denoising
noise_profile : 1d darray of scalar
noise_profile
BGNxx : 2d ndarray of scalar
Noise map
References
----------
.. [1] Steven F. Boll, "Suppression of Acoustic Noise in Speech Using Spectral
Subtraction", IEEE Transactions on Signal Processing, 27(2),pp 113-120,1979
`DOI:10.1109/TASSP.1979.1163209 <https://doi.org/10.1109/TASSP.1979.1163209>`_
.. [2] Y. Ephraim and D. Malah, Speech enhancement using a minimum mean square
error short-time spectral amplitude estimator, IEEE. Transactions in
Acoust., Speech, Signal Process., vol. 32, no. 6, pp. 11091121, Dec. 1984.
`DOI:10.1109/TASSP.1984.1164453 <https://doi.org/10.1109/TASSP.1984.1164453>`_
Examples
--------
Load audio recording and convert it into spectrogram
>>> s, fs = maad.sound.load('../data/rock_savanna.wav')
>>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs)
Convert linear spectrogram into dB and add 96dB (which is the maximum dB
for 16 bits wav) in order to have positive values
>>> Sxx_dB = maad.util.power2dB(Sxx) + 96
Remove stationnary noise from the spectrogram in dB
>>> Sxx_dB_noNoise, noise_profile, _ = maad.sound.remove_background(Sxx_dB)
Plot both spectrograms
>>> import matplotlib.pyplot as plt
>>> import numpy as np
>>> fig, (ax1, ax2) = plt.subplots(2, 1)
>>> maad.util.plot2d(Sxx_dB, ax=ax1, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40)
>>> maad.util.plot2d(Sxx_dB_noNoise, ax=ax2, extent=ext, title='Without stationary noise', vmin=np.median(Sxx_dB_noNoise), vmax=np.median(Sxx_dB_noNoise)+40)
>>> fig.set_size_inches(15,8)
>>> fig.tight_layout()
"""
if verbose:
print(72 * '_')
print('Determine the profile of the stochastic background noise...')
Nf, Nw = Sxx.shape
# average spectrum (assumed to be ergodic)
mean_profile = np.mean(Sxx, 1)
# White Top Hat (to remove non uniform background) = i - opening(i)
selem = signal.gaussian(gauss_win, gauss_std)
noise_profile = morphology.grey_opening(mean_profile, structure=selem)
# Remove the artefact at the end of the spectrum (2 highest frequencies)
noise_profile[-2:] = mean_profile[-2:]
noise_profile[:2] = mean_profile[:2]
# Create a matrix with the noise profile
noise_spectro = np.kron(np.ones((Nw, 1)), noise_profile)
noise_spectro = noise_spectro.transpose()
# snr estimate a posteriori
SNR_est = Sxx - noise_spectro
# to avoid dividing by 0
SNR_est[SNR_est <= 0] = 0
noise_spectro[noise_spectro == 0] = _MIN_
# ratio
SNR_est = (Sxx/noise_spectro)
# keep only positive values
SNR_est = SNR_est*(SNR_est > 0)
# compute attenuation map
# if llambda, beta1 and beta 2 are equal to 1, it is (1 - noise_spectro)
an_lk = (1-llambda*((1./(SNR_est+1))**beta1))**beta2
an_lk = an_lk*(an_lk > 0) # keep only positive values
if verbose:
print('Remove the stochastic background noise...')
# Apply the attenuation map to the STFT coefficients
Sxx_out = an_lk*Sxx
# noise map BGNxx
BGNxx = Sxx - Sxx_out
# if nan in the image, convert nan into 0
np.nan_to_num(Sxx_out, 0)
# Set negative value to 0
Sxx_out[Sxx_out < 0] = 0
# Display
if display:
ylabel = kwargs.pop('ylabel', 'Frequency [Hz]')
xlabel = kwargs.pop('xlabel', 'Time [sec]')
title = kwargs.pop('title', 'Spectrogram without stationnary noise')
cmap = kwargs.pop('cmap', 'gray')
vmin = kwargs.pop('vmin', np.min(Sxx_out))
vmax = kwargs.pop('vmax', np.max(Sxx_out))
extent = kwargs.pop('extent', None)
if extent is not None:
fn = np.arange(0, Nf)*(extent[3]-extent[2])/(Nf-1) + extent[2]
xlabel = 'frequency [Hz]'
figsize = kwargs.pop('figsize', (4, 0.33*(extent[1]-extent[0])))
else:
fn = np.arange(Nf)
xlabel = 'pseudofrequency [points]'
figsize = kwargs.pop('figsize', (4, 13))
_, fig = plot2d(Sxx_out, extent=extent, figsize=figsize, title=title,
ylabel=ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax,
cmap=cmap, **kwargs)
fig2, (ax1, ax2) = plt.subplots(2, sharex=True)
fig2.set_size_inches((5, 4))
ax1, _ = plot1d(fn, mean_profile, ax=ax1, legend='Original profile',
color='b',
xlabel='', ylabel= 'Amplitude [dB]', figtitle='')
ax1, _ = plot1d(fn, np.mean(BGNxx, axis=1), ax=ax1, legend='Noise profile',
color='r',
xlabel='', ylabel='Amplitude [dB]', figtitle='')
ax2, _ = plot1d(fn, np.mean(Sxx_out, axis=1), ax=ax2, color='k',
legend='Denoized profile',
xlabel=xlabel, ylabel= 'Amplitude [dB]', figtitle='')
fig2.tight_layout()
# SAVE FIGURE
if savefig is not None:
dpi = kwargs.pop('dpi', 96)
dpi = kwargs.pop('dpi', 96)
bbox_inches = kwargs.pop('bbox_inches', 'tight')
format = kwargs.pop('format', 'png')
savefilename = kwargs.pop(
'savefilename', '_spectro_after_noise_subtraction')
filename = savefig+savefilename+'.'+format
print('\n''save figure : %s' % filename)
fig.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches,
format=format, **kwargs)
return Sxx_out, noise_profile, BGNxx
# %%
[docs]
def remove_background_morpho(Sxx, q=0.1, display=False, savefig=None, **kwargs):
"""
Remove background noise in a spectrogram using mathematical morphology tool.
Parameters
----------
Sxx : 2D numpy array
Original spectrogram (or image)
q : float
Quantile which must be between 0 and 1 inclusive. The closest to one,
the finest details are kept
display : boolean, optional, default is False
Display the signal if True
savefig : string, optional, default is None
Root filename (with full path) is required to save the figures. Postfix
is added to the root filename.
kwargs, optional. This parameter is used by plt.plot and savefig functions
- savefilename : str, optional, default :'_spectro_after_noise_subtraction.png'
Postfix of the figure filename
- figsize : tuple of integers, optional, default: (4,10)
width, height in inches.
- title : string, optional, default : 'Spectrogram'
title of the figure
- xlabel : string, optional, default : 'Time [s]'
label of the horizontal axis
- ylabel : string, optional, default : 'Amplitude [AU]'
label of the vertical axis
- cmap : string or Colormap object, optional, default is 'gray'
See https://matplotlib.org/examples/color/colormaps_reference.html
in order to get all the existing colormaps
examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic',
'viridis'...
- vmin, vmax : scalar, optional, default: None
`vmin` and `vmax` are used in conjunction with norm to normalize
luminance data. Note if you pass a `norm` instance, your
settings for `vmin` and `vmax` will be ignored.
- extent : scalars (left, right, bottom, top), optional, default: None
The location, in data-coordinates, of the lower-left and
upper-right corners. If `None`, the image is positioned such that
the pixel centers fall on zero-based (row, column) indices.
- dpi : integer, optional, default is 96
Dot per inch.
For printed version, choose high dpi (i.e. dpi=300) => slow
For screen version, choose low dpi (i.e. dpi=96) => fast
- format : string, optional, default is 'png'
Format to save the figure
... and more, see matplotlib
Returns
-------
Sxx_out : 2d ndarray of scalar
Spectrogram after denoising
noise_profile : 1d ndarray of scalar
Noise profile
BGNxx : 2d ndarray of scalar
Noise map
Examples
--------
Load audio recording and convert it into spectrogram
>>> s, fs = maad.sound.load('../data/rock_savanna.wav')
>>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs)
Convert linear spectrogram into dB
>>> Sxx_dB = maad.util.power2dB(Sxx) +96
Remove stationnary noise from the spectrogram
>>> Sxx_dB_noNoise,_,_ = maad.sound.remove_background_morpho(Sxx_dB, q=0.5)
Plot both spectrograms
>>> import matplotlib.pyplot as plt
>>> import numpy as np
>>> fig, (ax1, ax2) = plt.subplots(2, 1)
>>> maad.util.plot2d(Sxx_dB, ax=ax1, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40)
>>> maad.util.plot2d(Sxx_dB_noNoise, ax=ax2, extent=ext, title='Without stationary noise',vmin=np.median(Sxx_dB_noNoise), vmax=np.median(Sxx_dB_noNoise)+40)
>>> fig.set_size_inches(15,8)
>>> fig.tight_layout()
Load audio recording and convert it into spectrogram
>>> s, fs = maad.sound.load('../data/rock_savanna.wav')
>>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs, tcrop=(0,20))
>>> Sxx_dB = maad.util.power2dB(Sxx) +96
Remove stationnary noise from the spectrogram with different q
>>> Sxx_dB_noNoise_q25,_,_ = maad.sound.remove_background_morpho(Sxx_dB, q=0.25)
>>> Sxx_dB_noNoise_q50,_,_ = maad.sound.remove_background_morpho(Sxx_dB, q=0.5)
>>> Sxx_dB_noNoise_q75,_,_ = maad.sound.remove_background_morpho(Sxx_dB, q=0.75)
Plot 3 spectrograms
>>> import matplotlib.pyplot as plt
>>> fig, (ax1, ax2, ax3) = plt.subplots(3, 1)
>>> maad.util.plot2d(Sxx_dB_noNoise_q25, ax=ax1, extent=ext, title='Without stationary noise (q=0.25)',vmin=np.median(Sxx_dB_noNoise_q25), vmax=np.median(Sxx_dB_noNoise_q25)+40)
>>> maad.util.plot2d(Sxx_dB_noNoise_q50, ax=ax2, extent=ext, title='Without stationary noise (q=0.50)',vmin=np.median(Sxx_dB_noNoise_q50), vmax=np.median(Sxx_dB_noNoise_q50)+40)
>>> maad.util.plot2d(Sxx_dB_noNoise_q75, ax=ax3, extent=ext, title='Without stationary noise (q=0.75)',vmin=np.median(Sxx_dB_noNoise_q75), vmax=np.median(Sxx_dB_noNoise_q75)+40)
>>> fig.set_size_inches(15,9)
>>> fig.tight_layout()
"""
# Use morpho math tools to estimate the background noise
BGNxx = reconstruction(seed=Sxx-(np.quantile(Sxx, q)),
mask=Sxx, method='dilation')
Sxx_out = Sxx - BGNxx
# noise profile along time axis
noise_profile = np.mean(BGNxx, 1)
# Set negative value to 0
Sxx_out[Sxx_out < 0] = 0
# Display
if display:
ylabel = kwargs.pop('ylabel', 'Frequency [Hz]')
xlabel = kwargs.pop('xlabel', 'Time [sec]')
title = kwargs.pop('title', 'Spectrogram without stationnary noise')
cmap = kwargs.pop('cmap', 'gray')
vmin = kwargs.pop('vmin', np.min(Sxx_out))
vmax = kwargs.pop('vmax', np.max(Sxx_out))
extent = kwargs.pop('extent', None)
Nf, Nw = Sxx.shape
if extent is not None:
fn = np.arange(0, Nf)*(extent[3]-extent[2])/(Nf-1) + extent[2]
xlabel = 'frequency [Hz]'
figsize = kwargs.pop('figsize', (4, 0.33*(extent[1]-extent[0])))
else:
fn = np.arange(Nf)
xlabel = 'pseudofrequency [points]'
figsize = kwargs.pop('figsize', (4, 13))
_, fig = plot2d(BGNxx, extent=extent, figsize=figsize, title='Noise map',
ylabel=ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax,
cmap=cmap, **kwargs)
_, fig = plot2d(Sxx_out, extent=extent, figsize=figsize, title=title,
ylabel=ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax,
cmap=cmap, **kwargs)
fig2, (ax1, ax2) = plt.subplots(2, sharex=True)
fig2.set_size_inches((5, 4))
ax1, _ = plot1d(fn, np.mean(Sxx, axis=1), ax=ax1, legend='Original profile',
color='b',
xlabel='', ylabel= 'Amplitude [dB]', figtitle='')
ax1, _ = plot1d(fn, np.mean(BGNxx, 1), ax=ax1, legend='Noise profile',
color='r',
xlabel='', ylabel='Amplitude [dB]', figtitle='')
ax2, _ = plot1d(fn, np.mean(Sxx_out, axis=1), ax=ax2, color='k',
legend='Denoized profile',
xlabel=xlabel, ylabel= 'Amplitude [dB]', figtitle='')
fig2.tight_layout()
# SAVE FIGURE
if savefig is not None:
dpi = kwargs.pop('dpi', 96)
dpi = kwargs.pop('dpi', 96)
bbox_inches = kwargs.pop('bbox_inches', 'tight')
format = kwargs.pop('format', 'png')
savefilename = kwargs.pop(
'savefilename', '_spectro_after_noise_subtraction')
filename = savefig+savefilename+'.'+format
print('\n''save figure : %s' % filename)
fig.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches,
format=format, **kwargs)
return Sxx_out, noise_profile, BGNxx
# %%
[docs]
def remove_background_along_axis(Sxx, mode='median', axis=1, N=25, N_bins=50,
display=False, savefig=None, **kwargs):
"""
Get the noisy profile along the defined axis and remove this profile from
the spectrogram.
Parameters
----------
Sxx : 2D numpy array
Original spectrogram (or image)
mode : str, optional, default is 'median'
Select the mode to remove the noise
Possible values for mode are :
- 'ale' : Adaptative Level Equalization algorithm [Lamel & al. 1981]
- 'median' : subtract the median value
- 'mean' : subtract the mean value (DC)
axis : integer, default is 1
if matrix, estimate the mode for each row (axis=0) or each column (axis=1)
N : int, default is 25
length of window to compute the running mean of the noise profile
N_bins : int (only for mode = "ale"), default is 50
number of bins to compute the histogram
display : boolean, optional, default is False
Display the signal if True
savefig : string, optional, default is None
Root filename (with full path) is required to save the figures. Postfix
is added to the root filename.
kwargs, optional. This parameter is used by plt.plot and savefig functions
- savefilename : str, optional, default :'_spectro_after_noise_subtraction.png'
Postfix of the figure filename
- figsize : tuple of integers, optional, default: (4,10)
width, height in inches.
- title : string, optional, default : 'Spectrogram'
title of the figure
- xlabel : string, optional, default : 'Time [s]'
label of the horizontal axis
- ylabel : string, optional, default : 'Amplitude [AU]'
label of the vertical axis
- cmap : string or Colormap object, optional, default is 'gray'
See https://matplotlib.org/examples/color/colormaps_reference.html
in order to get all the existing colormaps
examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic',
'viridis'...
- vmin, vmax : scalar, optional, default: None
`vmin` and `vmax` are used in conjunction with norm to normalize
luminance data. Note if you pass a `norm` instance, your
settings for `vmin` and `vmax` will be ignored.
- extent : scalars (left, right, bottom, top), optional, default: None
The location, in data-coordinates, of the lower-left and
upper-right corners. If `None`, the image is positioned such that
the pixel centers fall on zero-based (row, column) indices.
- dpi : integer, optional, default is 96
Dot per inch.
For printed version, choose high dpi (i.e. dpi=300) => slow
For screen version, choose low dpi (i.e. dpi=96) => fast
- format : string, optional, default is 'png'
Format to save the figure
... and more, see matplotlib
Returns
-------
Sxx_out : 2d ndarray of scalar
Spectrogram after denoising
noise_profile : 1d ndarray of scalar
Noise profile
References
----------
.. [1] Towsey, M., 2013. Noise Removal from Wave-forms and Spectrograms Derived from Natural Recordings of the Environment. Queensland University of Technology, Brisbane
Examples
--------
Load audio recording and convert it into spectrogram
>>> s, fs = maad.sound.load('../data/rock_savanna.wav')
>>> Sxx,tn,fn,ext = maad.sound.spectrogram (s, fs)
Convert linear spectrogram into dB
>>> Sxx_dB = maad.util.power2dB(Sxx) + 96
Remove stationnary noise from the spectrogram with modes 'ale', 'median', and 'mean'.
>>> Sxx_dB_noNoise_ale,_ = maad.sound.remove_background_along_axis(Sxx_dB, mode='ale')
>>> Sxx_dB_noNoise_med,_ = maad.sound.remove_background_along_axis(Sxx_dB, mode='median')
>>> Sxx_dB_noNoise_mean,_ = maad.sound.remove_background_along_axis(Sxx_dB, mode='mean')
Plot spectrograms
>>> import matplotlib.pyplot as plt
>>> import numpy as np
>>> fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1)
>>> maad.util.plot2d(Sxx_dB, ax=ax1, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40)
>>> maad.util.plot2d(Sxx_dB_noNoise_ale, ax=ax2, extent=ext, title='Without stationary noise (mode = ''ale'')',vmin=np.median(Sxx_dB_noNoise_ale), vmax=np.median(Sxx_dB_noNoise_ale)+40)
>>> maad.util.plot2d(Sxx_dB_noNoise_med, ax=ax3, extent=ext, title='Without stationary noise (mode = ''med'')',vmin=np.median(Sxx_dB_noNoise_med), vmax=np.median(Sxx_dB_noNoise_med)+40)
>>> maad.util.plot2d(Sxx_dB_noNoise_mean, ax=ax4, extent=ext, title='Without stationary noise (mode = ''mean'')',vmin=np.median(Sxx_dB_noNoise_mean), vmax=np.median(Sxx_dB_noNoise_mean)+40)
>>> fig.set_size_inches(8,10)
>>> fig.tight_layout()
"""
# get the noise profile, N define the running mean size of the histogram
# in case of mode='ale'
noise_profile = get_unimode(Sxx, mode, axis, N=7, N_bins=N_bins)
# smooth the profile by removing spurious thin peaks
noise_profile = running_mean(noise_profile, N)
# Remove horizontal noisy peaks profile (BGN_VerticalNoise is an estimation)
# and negative value to zero
if axis == 1:
Sxx_out = Sxx - noise_profile[..., np.newaxis]
elif axis == 0:
Sxx_out = Sxx - noise_profile[np.newaxis, ...]
# Set negative value to 0
Sxx_out[Sxx_out < 0] = 0
# Display
if display:
ylabel = kwargs.pop('ylabel', 'Frequency [Hz]')
xlabel = kwargs.pop('xlabel', 'Time [sec]')
title = kwargs.pop('title', 'Spectrogram without stationnary noise')
cmap = kwargs.pop('cmap', 'gray')
vmin = kwargs.pop('vmin', np.min(Sxx_out))
vmax = kwargs.pop('vmax', np.max(Sxx_out))
extent = kwargs.pop('extent', None)
Nf, Nw = Sxx.shape
if extent is not None:
fn = np.arange(0, Nf)*(extent[3]-extent[2])/(Nf-1) + extent[2]
xlabel = 'frequency [Hz]'
figsize = kwargs.pop('figsize', (4, 0.33*(extent[1]-extent[0])))
else:
fn = np.arange(Nf)
xlabel = 'pseudofrequency [points]'
figsize = kwargs.pop('figsize', (4, 13))
_, fig1 = plot2d(Sxx_out, extent=extent, figsize=figsize, title=title,
ylabel= ylabel, xlabel = xlabel, vmin=vmin, vmax=vmax,
cmap=cmap, **kwargs)
fig2, (ax1, ax2) = plt.subplots(2, sharex=True)
fig2.set_size_inches((5, 4))
ax1, _ = plot1d(fn, mean_dB(Sxx, axis=axis), ax=ax1, legend='Original profile',
color='b',
xlabel='', ylabel= 'Amplitude [dB]', figtitle='')
ax1, _ = plot1d(fn, noise_profile, ax=ax1, legend='Noise profile',
color='r',
xlabel='', ylabel='Amplitude [dB]', figtitle='')
ax2, _ = plot1d(fn, mean_dB(Sxx_out, axis=axis), ax=ax2, color='k',
legend='Denoized profile',
xlabel=xlabel, ylabel= 'Amplitude [dB]', figtitle='')
fig2.tight_layout()
# SAVE FIGURE
if savefig is not None:
dpi = kwargs.pop('dpi', 96)
dpi = kwargs.pop('dpi', 96)
bbox_inches = kwargs.pop('bbox_inches', 'tight')
format = kwargs.pop('format', 'png')
savefilename = kwargs.pop(
'savefilename', '_spectro_after_noise_subtraction')
filename = savefig+savefilename+'.'+format
print('\n''save figure : %s' % filename)
fig1.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches,
format=format, **kwargs)
return Sxx_out, noise_profile
# %%
#%%
[docs]
def pcen(Sxx, gain=0.98, bias=2, power=0.5, b=0.025, eps=1e-6,
display=False, savefig=False, **kwargs):
"""
Per-Channel Energy Normalization (PCEN)
This function normalizes a time-frequency representation Sxx by
performing automatic gain control, followed by nonlinear compression [1]_
This function was adapted from librosa PCEN function [2]_ to match linear
spectrogram representation
Parameters
----------
Sxx : 2D numpy array
Original spectrogram (or image)
gain : scalar (>=0), optional, default is 0.98
Gain factor. Typical values should be less than 1.
bias : scalar (>=0), optional, default is 2
Bias point of the nonlinear compression
power : scalar (>=0), optional, default is 0.5
Compression exponent. Typical values should be between 0 and 0.5.
Smaller values of power result in stronger compression.
At the limit power=0, polynomial compression becomes logarithmic.
b : scalar between [0, 1], optional, default is 0.025
The filter coefficient for the low-pass filter.
eps : scalar (>0), optional, default is 1e-6
A small constant used to ensure numerical stability of the filter.
display : boolean, optional, default is False
Display the signal if True
savefig : string, optional, default is None
Root filename (with full path) is required to save the figures. Postfix
is added to the root filename.
kwargs, optional. This parameter is used by plt.plot and savefig functions
- savefilename : str, optional, default :'_spectro_after_noise_subtraction.png'
Postfix of the figure filename
- figsize : tuple of integers, optional, default: (4,10)
width, height in inches.
- title : string, optional, default : 'Spectrogram'
title of the figure
- xlabel : string, optional, default : 'Time [s]'
label of the horizontal axis
- ylabel : string, optional, default : 'Amplitude [AU]'
label of the vertical axis
- cmap : string or Colormap object, optional, default is 'gray'
See https://matplotlib.org/examples/color/colormaps_reference.html
in order to get all the existing colormaps
examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic',
'viridis'...
- vmin, vmax : scalar, optional, default: None
`vmin` and `vmax` are used in conjunction with norm to normalize
luminance data. Note if you pass a `norm` instance, your
settings for `vmin` and `vmax` will be ignored.
- extent : scalars (left, right, bottom, top), optional, default: None
The location, in data-coordinates, of the lower-left and
upper-right corners. If `None`, the image is positioned such that
the pixel centers fall on zero-based (row, column) indices.
- dpi : integer, optional, default is 96
Dot per inch.
For printed version, choose high dpi (i.e. dpi=300) => slow
For screen version, choose low dpi (i.e. dpi=96) => fast
- format : string, optional, default is 'png'
Format to save the figure
... and more, see matplotlib
Returns
-------
Sxx_out : 2d ndarray of scalar
Spectrogram after PCEN
enhance_profile : 1d ndarray of scalar
Enhance profile
PCENxx : 2d ndarray of scalar
enhanced map
References
----------
.. [1] Lostanlen, V., Salamon, J., McFee, B., Cartwright, M., Farnsworth, A., Kelling, S., and Bello, J. P. Per-Channel Energy Normalization: Why and How. IEEE Signal Processing Letters, 26(1), 39-43. `DOI: 10.1109/LSP.2018.2878620 <https://doi.org/10.1109/LSP.2018.2878620>`_
.. [2] McFee, Brian, Colin Raffel, Dawen Liang, Daniel PW Ellis,Matt McVicar, Eric Battenberg, and Oriol Nieto. “librosa: Audio and music signal analysis in python.” In Proceedings of the 14th python in science conference, pp. 18-25. 2015. https://librosa.org/doc/main/generated/librosa.pcen.html
"""
# PCEN
# 1. smooth the spectrogram
M = signal.lfilter([b], [1, b - 1], Sxx)
# 2. take the inverse of smoothed spectrogram
smooth = (eps + M)**(-gain)
# 3. Nonlinear compression
Sxx_out = (Sxx * smooth + bias)**power - bias**power
# Enhance map (2d)
PCENxx = Sxx - Sxx_out
# Enhance profile along time axis (1d)
enhance_profile = np.mean(PCENxx, 1)
# Display
if display:
ylabel = kwargs.pop('ylabel', 'Frequency [Hz]')
xlabel = kwargs.pop('xlabel', 'Time [sec]')
title = kwargs.pop(
'title', 'Spectrogram without stationnary noise after PCEN')
cmap = kwargs.pop('cmap', 'gray')
vmin = kwargs.pop('vmin', np.min(Sxx_out))
vmax = kwargs.pop('vmax', np.max(Sxx_out))
extent = kwargs.pop('extent', None)
Nf, Nw = Sxx.shape
if extent is not None:
fn = np.arange(0, Nf)*(extent[3]-extent[2])/(Nf-1) + extent[2]
xlabel = 'frequency [Hz]'
figsize = kwargs.pop(
'figsize', (4, 0.33*(extent[1]-extent[0])))
else:
fn = np.arange(Nf)
xlabel = 'pseudofrequency [points]'
figsize = kwargs.pop('figsize', (4, 13))
_, fig = plot2d(PCENxx, extent=extent, figsize=figsize, title='Enhance map',
ylabel=ylabel, xlabel=xlabel, vmin=vmin, vmax=vmax,
cmap=cmap, **kwargs)
_, fig = plot2d(Sxx_out, extent=extent, figsize=figsize, title=title,
ylabel=ylabel, xlabel=xlabel, vmin=vmin, vmax=vmax,
cmap=cmap, **kwargs)
fig2, (ax1, ax2) = plt.subplots(2, sharex=True)
fig2.set_size_inches((5, 4))
ax1, _ = plot1d(fn, np.mean(Sxx, axis=1), ax=ax1, legend='Original profile',
color='b',
xlabel='', ylabel='Amplitude [dB]', figtitle='')
ax1, _ = plot1d(fn, np.mean(PCENxx, 1), ax=ax1, legend='Enhance profile',
color='r',
xlabel='', ylabel='Amplitude [dB]', figtitle='')
ax2, _ = plot1d(fn, np.mean(Sxx_out, axis=1), ax=ax2, color='k',
legend='Profile after PCEN',
xlabel=xlabel, ylabel='Amplitude [dB]', figtitle='')
fig2.tight_layout()
# SAVE FIGURE
if savefig is not None:
dpi = kwargs.pop('dpi', 96)
dpi = kwargs.pop('dpi', 96)
bbox_inches = kwargs.pop('bbox_inches', 'tight')
format = kwargs.pop('format', 'png')
savefilename = kwargs.pop(
'savefilename', '_spectro_after_PCEN')
filename = savefig+savefilename+'.'+format
print('\n''save figure : %s' % filename)
fig.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches,
format=format, **kwargs)
return Sxx_out, enhance_profile, PCENxx