Source code for maad.sound.input_output

#!/usr/bin/env python
""" 
Collection of functions to load, read, write, play audio signal or 
its time-frequency representation
"""
#
# Authors:  Juan Sebastian ULLOA <lisofomia@gmail.com>
#           Sylvain HAUPERT <sylvain.haupert@mnhn.fr>
#
# License: New BSD License

# =============================================================================
# Load the modules
# =============================================================================
# Import external modules
import numpy as np
import warnings
import io
from urllib.request import urlopen
from scipy.io import wavfile 
from skimage.io import imread 
from scipy.io.wavfile import write as write_wav

# Import internal modules
from maad.util import plot1d, plot2d, linear_scale

#%%
# =============================================================================
# public functions
# =============================================================================
[docs] def load(filename, channel='left', detrend=True, verbose=False, display=False, savefig=None, **kwargs): """ Load an audio file (stereo or mono). Currently, this function con only load WAVE files. Parameters ---------- filename : string Name or path of the audio file channel : {'left', right'}, optional, default: left In case of stereo sound select the channel that is kept detrend : boolean, optional, default is True Subtract the DC value. verbose : boolean, optional, default is False Print messages into the console or terminal if verbose is True display : boolean, optional, default is False Display the signal if True savefig : string, optional, default is None Root filename (with full path) is required to save the figures. Postfix is added to the root filename. kwargs, optional. This parameter is used by plt.plot and savefig functions - savefilename : str, optional, default :'_audiogram.png' Postfix of the figure filename - figsize : tuple of integers, optional, default: (4,10) width, height in inches. - title : string, optional, default : 'Spectrogram' title of the figure - xlabel : string, optional, default : 'Time [s]' label of the horizontal axis - ylabel : string, optional, default : 'Amplitude [AU]' label of the vertical axis - cmap : string or Colormap object, optional, default is 'gray' See https://matplotlib.org/examples/color/colormaps_reference.html in order to get all the existing colormaps examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic', 'viridis'... - vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with norm to normalize luminance data. Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored. extent : list of scalars [left, right, bottom, top], optional, default: None The location, in data-coordinates, of the lower-left and upper-right corners. If `None`, the image is positioned such that the pixel centers fall on zero-based (row, column) indices. - dpi : integer, optional, default is 96 Dot per inch. For printed version, choose high dpi (i.e. dpi=300) => slow For screen version, choose low dpi (i.e. dpi=96) => fast - format : string, optional, default is 'png' Format to save the figure ... and more, see matplotlib Returns ------- s_out : 1d ndarray of integer Vector containing the audiogram fs : int The sampling frequency in Hz of the audiogram Examples -------- >>> import maad >>> s, fs = maad.sound.load("../data/tropical_forest_morning.wav", channel='left') >>> print("The sampling frequency of the audio file is {} Hz".format(fs)) The sampling frequency of the audio file is 48000 Hz >>> import numpy as np >>> tn = np.arange(0,len(s))/fs >>> import matplotlib.pyplot as plt >>> fig, (ax0, ax1) = plt.subplots(2,1, sharex=True, squeeze=True) >>> ax0, _ = maad.util.plot1d(tn,s,ax=ax0, figtitle='ground level') >>> ax0.set_ylim((-0.075,0.075)) (-0.075, 0.075) >>> s, fs = maad.sound.load("../data/tropical_forest_morning.wav", channel='right') >>> ax1, _ = maad.util.plot1d(tn,s,ax=ax1, figtitle='canopy level') >>> ax1.set_ylim((-0.075,0.075)) (-0.075, 0.075) >>> fig.tight_layout() """ if verbose : print(72 * '_' ) print("loading %s..." %filename) # Read the .wav file and return the sampling frequency fs (Hz) # and the audiogram s as a 1D array of integer # Execute the function wavfile.read with warnings suppressed in order to avoid the # common warning "WavFileWarning: Chunk (non-data) not understood, skipping it" with warnings.catch_warnings(): warnings.filterwarnings("ignore") # Ignore all warnings fs, s = wavfile.read(filename) if verbose :print("Sampling frequency: %dHz" % fs) # Normalize the signal between -1 to 1 depending on the type (number of bits) if s.dtype == np.int32: bit = 32 s = s/2**(bit-1) elif s.dtype == np.int16: bit = 16 s = s/2**(bit-1) elif s.dtype == np.uint8: bit = 8 s = s/2**(bit) # as it's unsigned # test if stereo signal. if YES => keep only the ch_select if s.ndim==2 : if channel == 'left' : if verbose :print("Select left channel") s_out = s[:,0] else: if verbose :print("Select right channel") s_out = s[:,1] else: s_out = s; # Detrend the signal by removing the DC offset if detrend: s_out = s_out - np.mean(s_out) # Time vector tn = np.arange(s_out.size)/fs # DISPLAY if display : figtitle=kwargs.pop('figtitle', 'Orignal sound') _, fig = plot1d(tn, s_out, figtitle=figtitle, **kwargs) # SAVE FIGURE if savefig is not None : dpi=kwargs.pop('dpi', 96) bbox_inches=kwargs.pop('bbox_inches', 'tight') format=kwargs.pop('format','png') savefilename=kwargs.pop('savefilename', '_audiogram') filename = savefig+savefilename+'.'+format if verbose :print('\n''save figure : %s' %filename) fig.savefig(fname=filename, dpi=dpi, bbox_inches=bbox_inches, format=format, **kwargs) return s_out, fs
#%%
[docs] def load_spectrogram(filename, fs, duration, flims = None, flipud = True, verbose=False, display=False, **kwargs): """ Load an image from a file or an URL Parameters ---------- filename : string Image file name, e.g. ``test.jpg`` or URL. fs : scalar Sampling frequency of the audiogram (in Hz) duration : scalar Duration of the audiogram (in s) flims : list of 2 scalars [min, max], optional, default is None flims corresponds to the min and max boundary frequency values flipud : boolean, optional, default is True Vertical flip of the matrix (image) verbose : boolean, optional, default is False if True, print message in terminal display : boolean, optional, default is False if True, display the image kwargs, optional. This parameter is used by plt.plot - figsize : tuple of integers, optional, default: (4,10) width, height in inches. - title : string, optional, default : 'Spectrogram' title of the figure - xlabel : string, optional, default : 'Time [s]' label of the horizontal axis - ylabel : string, optional, default : 'Amplitude [AU]' label of the vertical axis - cmap : string or Colormap object, optional, default is 'gray' See https://matplotlib.org/examples/color/colormaps_reference.html in order to get all the existing colormaps examples: 'hsv', 'hot', 'bone', 'tab20c', 'jet', 'seismic', 'viridis'... - vmin, vmax : scalar, optional, default: None `vmin` and `vmax` are used in conjunction with norm to normalize luminance data. Note if you pass a `norm` instance, your settings for `vmin` and `vmax` will be ignored. - extent : scalars (left, right, bottom, top), optional, default: None The location, in data-coordinates, of the lower-left and upper-right corners. If `None`, the image is positioned such that the pixel centers fall on zero-based (row, column) indices. - dpi : integer, optional, default is 96 Dot per inch. For printed version, choose high dpi (i.e. dpi=300) => slow For screen version, choose low dpi (i.e. dpi=96) => fast - format : string, optional, default is 'png' Format to save the figure ... and more, see matplotlib Returns ------- Sxx : ndarray The different color bands/channels are stored in the third dimension, such that a gray-image is MxN, an RGB-image MxNx3 and an RGBA-image MxNx4. tn : 1d ndarray of floats time vector (horizontal x-axis) fn : 1d ndarray of floats Frequency vector (vertical y-axis) extent : list of scalars [left, right, bottom, top] The location, in data-coordinates, of the lower-left and upper-right corners. Examples -------- >>> import maad >>> xenocanto_link = 'https://www.xeno-canto.org/sounds/uploaded/DTKJSKMKZD/ffts/XC445081-med.png' >>> Sxx, tn, fn, ext = maad.sound.load_spectrogram(filename=xenocanto_link, \ fs=44100, \ flims=[0,15000], \ duration = 10, \ ) >>> print("The time resolution of the spectrogram is {} s and the frequency resolution is {} Hz".format(tn[1]-tn[0], fn[1]-fn[0])) The time resolution of the spectrogram is 0.020876826722338204 s and the frequency resolution is 94.33962264150944 Hz >>> import matplotlib.pyplot as plt >>> ax, fig = maad.util.plot2d(Sxx,extent=ext) """ if verbose : print(72 * '_' ) print("loading %s..." %filename) # Load image Sxx = imread(filename, as_gray=True) # if 3D, convert into 2D if len(Sxx.shape) == 3: Sxx = Sxx[:,:,0] # Rescale the image between 0 to 1 Sxx = linear_scale(Sxx, minval= 0.0, maxval=1.0, axis=None) # Get the resolution if flims is None : df = fs/(Sxx.shape[0]-1) else: df = (flims[1]-flims[0]) / (Sxx.shape[0]-1) dt = duration/(Sxx.shape[1]-1) # create the vectors if flims is None : fn = np.arange(0,fs/2,df) else: fn = np.arange(flims[0],flims[1],df) tn = np.arange(0,Sxx.shape[1],1) * dt # Extent extent = [tn[0], tn[-1], fn[0], fn[-1]] # flip the image vertically if flipud: Sxx = np.flip(Sxx, 0) # Display if display : ylabel =kwargs.pop('ylabel','Frequency [Hz]') xlabel =kwargs.pop('xlabel','Time [sec]') title =kwargs.pop('title','loaded spectrogram') cmap =kwargs.pop('cmap','gray') figsize=kwargs.pop('figsize',(4, 0.33*(extent[1]-extent[0]))) vmin=kwargs.pop('vmin',Sxx.min()) vmax=kwargs.pop('vmax',Sxx.max()) _, fig = plot2d (Sxx, extent=extent, figsize=figsize,title=title, ylabel = ylabel, xlabel = xlabel,vmin=vmin, vmax=vmax, cmap=cmap, **kwargs) return Sxx, tn, fn, extent
#%%
[docs] def write(filename, fs, data, bit_depth=None): """ Write a NumPy array as a WAV file with the Scipy method. [1]_ Parameters ---------- filename : string or open file handle Name of output wav file. fs : int Sample rate (samples/sec). data : ndarray Mono or stereo signal as NumPy array. bit_depth: int Specifies the bit depth format of the audio recording. Should be one of 8, 16 or 32. If None, bit depth will be determined from the Numpy data type. See section Common data types. See Also -------- scipy.io.wavfile.write Notes ----- The data-type determines the bits-per-sample and PCM/float. Common data types: [2]_ ===================== =========== =========== ============= WAV format Min Max NumPy dtype ===================== =========== =========== ============= 32-bit floating-point -1.0 +1.0 float32 32-bit PCM -2147483648 +2147483647 int32 16-bit PCM -32768 +32767 int16 8-bit PCM 0 255 uint8 ===================== =========== =========== ============= References ---------- .. [1] The SciPy community, "scipy.io.wavfile.write", v1.6.0. https://docs.scipy.org/doc/scipy/reference/generated/scipy.io.wavfile.write.html .. [2] IBM Corporation and Microsoft Corporation, "Multimedia Programming Interface and Data Specifications 1.0", section "Data Format of the Samples", August 1991 http://www.tactilemedia.com/info/MCI_Control_Info.html Examples -------- >>> from maad import sound Synthesize a 440Hz sine wave at 44100 Hz and write it to disk. >>> import numpy as np >>> fs = 44100; T = 2.0 >>> t = np.linspace(0, T, int(T*fs)) >>> data = np.sin(2. * np.pi * 440. *t) >>> sound.write('example.wav', fs, data, bit_depth=16) Open an audio file, filter a frequency band and write to disk specifying the bit depth. >>> s, fs = sound.load('../data/spinetail.wav') >>> s_filt = sound.sinc(s, (3000, 10000), fs) >>> sound.write('spinetail_filtered.wav', fs, s_filt, bit_depth=16) """ if (data.dtype == 'float64') | (data.dtype == 'float32'): # Check that the array has values between -1 and 1 if (data.min() < -1) | (data.max() > 1): warnings.warn('Values should be between [-1, 1]. Clipping signal.') data[data<-1] = -1 data[data>1] = 1 # Convert to desired bit depth if bit_depth == 8: data = data + 1 # change range to postive [0,2] data = (data * 127).astype(np.uint8) elif bit_depth == 16: data = (data * 32767).astype(np.int16) elif bit_depth == 32: data = (data * 2147483647).astype(np.int32) else: warnings.warn('Values for bit depth should be 8, 16 or 32. Argument ignored.') pass if data.ndim > 1: data = data.T write_wav(filename, fs, np.asfortranarray(data))
#%%
[docs] def load_url(url): """ Download audio file from the web and load it as a variable. The audio file must be a Waveform Audio Format (WAV) file. Parameters ---------- url : str Audio code name or URL address where the file is located. Returns ------- s : 1d ndarray Array with the signal amplitude fs : int Sampling frequency of the signal, in Hertz (Hz) Examples -------- Load an audio example using its code name. >>> from maad import sound >>> s, fs = sound.load_url('spinetail') >>> print('The samping rate of the audio file is {} Hz'.format(fs)) The samping rate of the audio file is 44100 Hz Load an audio example using the full web address. >>> s, fs = sound.load_url('https://github.com/scikit-maad/scikit-maad/raw/production/data/spinetail.wav') >>> print('The samping rate of the audio file is {} Hz'.format(fs)) The samping rate of the audio file is 44100 Hz """ # set dictionary for examples from the audio dataset code_name_url = { 'spinetail': 'https://github.com/scikit-maad/scikit-maad/raw/production/data/spinetail.wav', 'cold_forest_daylight': 'https://github.com/scikit-maad/scikit-maad/raw/production/data/cold_forest_daylight.wav', 'cold_forest_night': 'https://github.com/scikit-maad/scikit-maad/raw/production/data/cold_forest_night.wav', 'rock_savanna': 'https://github.com/scikit-maad/scikit-maad/raw/production/data/guyana_tropical_forest.wav', 'tropical_forest_morning': 'https://github.com/scikit-maad/scikit-maad/raw/production/data/tropical_forest_morning.wav' } # Check if url is in the dictionary keys if url in code_name_url.keys(): url_path = code_name_url[url] # load as web address else: url_path = url s, fs = load(io.BytesIO(urlopen(url_path).read())) return s, fs
if __name__ == "__main__": import doctest doctest.testmod()