Source code for maad.rois.template_matching_func

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Template matching functions

New BSD License

TODO:
    - min_t and max_t should be 0 to len(Sxx_audio)
"""

import pandas as pd
import numpy as np
from scipy.signal import find_peaks
from skimage.feature import match_template
import matplotlib.pyplot as plt
from matplotlib import patches
from maad import sound, util

#%%

[docs]
def template_matching(
    Sxx, Sxx_template, tn, ext, peak_th, peak_distance=None, display=False, **kwargs
):
    """
    Use normalized spectrogram cross-correlation to detect the occurrence of a template
    sound in a target audio.

    The output is an array with values between -1.0 and 1.0. The value at a given
    position corresponds to the correlation coefficient between the spectrograms of
    the target audio and the template. The function also delivers the detection found
    as peaks and as regions of interest (rois).

    Parameters
    ----------
    Sxx : 2D array
        Spectrogram of audio signal.

    Sxx_template : TYPE
        Spectrogram of target sound.

    tn : 1d array
        Time vector of target audio, which results from the maad.sound.spectrogram function.

    fn : 1d array
        Frecuency vector of target audio, which results from the maad.sound.spectrogram function.

    ext : list of scalars [left, right, bottom, top]
        Extent keyword arguments controls the bounding box in data coordinates for the
        spectrogram of the target audio, which results from the maad.sound.spectrogram function.

    peak_th : float, optional
        Threshold applied to find peaks in the cross-correlation array.
        Should be a value between -1 and 1.

    peak_distance : float, optional
        Required minimal temporal distance (>= 0) in seconds between neighbouring
        peaks. If set to `None`, the minimum temporal resolution will be used.
        The minimal temporal resolution is given by the array tn and depends on the parameters
        used to compute the spectrogram.

    display : Boolean, optional
        display the results of the template matching. The default is False.

    **kwargs: keywords pair, optional
        Set aditional specificities to find peaks in xcorroelation array.
        Arguments are passed to the the function scipy.signal.find_peaks.


    Returns
    -------
    xcorrcoef : 1D array
        Correlation coefficients resulting from the cross-correlation between
        audio and target template.

    rois : pandas DataFrame
        Detections found based on cross-correlation coefficients. The result is
        presented as a DataFrame where each row represent a detection, with the
        peak time (peak_time), peak amplitude (xcorrcoef), minimum and maximum time
        (min_t, max_t), and minimum and maximum frequency (min_f, max_f).

    """

    # check inputs
    if Sxx.ndim < Sxx.ndim:
        raise ValueError(
            "Dimensionality of template must be less than or "
            "equal to the dimensionality of image."
        )
    if np.any(np.less(Sxx.shape, Sxx_template.shape)):
        raise ValueError("Target spectrogram must be larger than template.")

    if peak_distance is None:  # if not provided, set to minimum distance
        peak_distance = np.diff(tn)[0]

    # set temporal distance to spectrogram pixels
    peak_distance_pixel = peak_distance / np.diff(tn)[0]

    if peak_distance_pixel < 1:
        raise ValueError(
            f"`peak_distance` must be greater or equal to spectrogram resolution: {np.diff(tn)[0]}"
        )

    # Pad Sxx to have len(xcorrcoef) == Sxx.shape[1]
    # if Sxx_template.shape[1] is even substract 1 to time width
    time_width = np.floor(Sxx_template.shape[1] / 2).astype(int)
    if (Sxx_template.shape[1] % 2) == 1:
        pad_width = (
            (
                0,
                0,
            ),
            (time_width, time_width),
        )
    else:
        pad_width = (
            (
                0,
                0,
            ),
            (time_width, time_width - 1),
        )
    Sxx_pad = np.pad(Sxx, pad_width, mode="edge")

    # Compute normalized cross-correlation
    xcorrcoef = match_template(Sxx_pad, Sxx_template)

    # When flims from Sxx is larger than Sxx_template, take mean value
    xcorrcoef = np.mean(xcorrcoef, axis=0)

    ## Find peaks
    prominence = kwargs.pop("prominence", None)
    width = kwargs.pop("width", None)
    wlen = kwargs.pop("wlen", None)
    rel_height = kwargs.pop("rel_height", 0.5)
    plateau_size = kwargs.pop("plateau_size", None)
    threshold = kwargs.pop("threshold", None)

    peaks, peak_dict = find_peaks(
        xcorrcoef,
        peak_th,
        threshold,
        peak_distance_pixel,
        prominence,
        width,
        wlen,
        rel_height,
        plateau_size,
    )
    peaks_time = tn[peaks]

    # Build rois as pandas Dataframe
    # Create Dataframe and adjust extreme values for min_t and max_t
    template_len = tn[Sxx_template.shape[1]] - tn[0]
    rois = pd.DataFrame(
        {
            "peak_time": peaks_time,
            "xcorrcoef": xcorrcoef[peaks],
            "min_t": peaks_time - template_len / 2,
            "max_t": peaks_time + template_len / 2,
        }
    )
    rois.loc[rois.min_t < 0 , "min_t"] = tn[0]
    rois.loc[rois.max_t > tn[-1] , "max_t"] = tn[-1]

    if display == True:
        rois['min_f'] = ext[2]
        rois['max_f'] = ext[3]
        # plot spectrogram
        fig, ax = plt.subplots(2, 1, figsize=(8, 5), sharex=True)
        util.plot_spectrogram(Sxx, ext, log_scale=False, ax=ax[0], colorbar=False)
        if not (rois.empty):
            for idx, _ in rois.iterrows():
                xy = (rois.min_t[idx], rois.min_f[idx])
                width = rois.max_t[idx] - rois.min_t[idx]
                height = rois.max_f[idx] - rois.min_f[idx]
                rect = patches.Rectangle(
                    xy, width, height, lw=1, edgecolor="yellow", facecolor="yellow", alpha=0.25
                )
                ax[0].add_patch(rect)

        # plot corr coef
        ax[1].plot(tn[0 : xcorrcoef.shape[0]], xcorrcoef)
        ax[1].plot(peaks_time, xcorrcoef[peaks], "x")
        ax[1].hlines(peak_th, 0, tn[-1], linestyle="dotted", color="0.75")
        ax[1].set_xlabel("Time [s]")
        ax[1].set_ylabel("Correlation coeficient")

    return xcorrcoef, rois