com.musicg.api.DetectionApi.java Source code

Java tutorial

Introduction

Here is the source code for com.musicg.api.DetectionApi.java

Source

/*
 * Copyright (C) 2011 Jacquet Wong
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.musicg.api;

import com.musicg.math.rank.ArrayRankDouble;
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation;
import com.musicg.math.statistics.ZeroCrossingRate;
import com.musicg.wave.Wave;
import com.musicg.wave.WaveHeader;
import com.musicg.wave.extension.Spectrogram;

/**
 * Api for detecting different sounds
 * 
 * @author Jacquet Wong
 * 
 */
public class DetectionApi {

    protected WaveHeader waveHeader;
    protected int fftSampleSize;
    protected int numFrequencyUnit;
    protected double unitFrequency;
    protected double minFrequency, maxFrequency;
    protected double minIntensity, maxIntensity;
    protected double minStandardDeviation, maxStandardDeviation;
    protected int highPass, lowPass;
    protected int minNumZeroCross, maxNumZeroCross;
    protected int lowerBoundary, upperBoundary;
    protected int numRobust;

    /**
     * Constructor, support mono Wav only, 4096 sample byte size for 44100Hz
     * 16bit mono wav
     * 
     * @param waveHeader
     *            the WAV file header
     */
    public DetectionApi(WaveHeader waveHeader) {
        if (waveHeader.getChannels() == 1) {
            this.waveHeader = waveHeader;
            init();
        } else {
            System.err.println("DetectionAPI supports mono Wav only");
        }
    }

    /**
     * Initiate the settings for specific sound detection
     */
    protected void init() {
        // do nothing, needed to be overrided
    }

    /**
     * Determine the audio bytes contains a specific sound or not
     * 
     * @param audioBytes
     *            input audio byte
     * @return true if the byes contain the specified sound
     */
    public boolean isSpecificSound(byte[] audioBytes) {

        int bytesPerSample = waveHeader.getBitsPerSample() / 8;
        int numSamples = audioBytes.length / bytesPerSample;

        // numSamples required to be a power of 2
        if (numSamples > 0 && Integer.bitCount(numSamples) == 1) {
            fftSampleSize = numSamples;
            numFrequencyUnit = fftSampleSize / 2;

            // frequency could be caught within the half of nSamples according
            // to Nyquist theory
            unitFrequency = (double) waveHeader.getSampleRate() / 2 / numFrequencyUnit;

            // set boundary
            lowerBoundary = (int) (highPass / unitFrequency);
            upperBoundary = (int) (lowPass / unitFrequency);
            // end set boundary

            Wave wave = new Wave(waveHeader, audioBytes); // audio bytes of this
                                                          // frame
            short[] amplitudes = wave.getSampleAmplitudes();

            // spectrum for the clip
            Spectrogram spectrogram = wave.getSpectrogram(fftSampleSize, 0);

            double[][] spectrogramData = spectrogram.getAbsoluteSpectrogramData();

            // since fftSampleSize==numSamples, there're only one spectrum which
            // is thisFrameSpectrogramData[0]
            double[] spectrum = spectrogramData[0];

            int frequencyUnitRange = upperBoundary - lowerBoundary + 1;
            double[] rangedSpectrum = new double[frequencyUnitRange];
            System.arraycopy(spectrum, lowerBoundary, rangedSpectrum, 0, rangedSpectrum.length);

            if (frequencyUnitRange <= spectrum.length) {

                if (isPassedIntensity(spectrum)) {
                    if (isPassedStandardDeviation(spectrogramData)) {
                        if (isPassedZeroCrossingRate(amplitudes)) {
                            if (isPassedFrequency(rangedSpectrum)) {
                                return true;
                            }
                        }
                    }
                }

                /*
                 * // run all checking for debug boolean isPassedChecking =
                 * true; // rule 1: check the intensity of this frame
                 * isPassedChecking &= isPassedIntensity(spectrum); // rule 2:
                 * check the frequency of this frame isPassedChecking &=
                 * isPassedFrequency(rangedSpectrum); // rule 3: check the zero
                 * crossing rate of this frame isPassedChecking &=
                 * isPassedZeroCrossingRate(amplitudes); // rule 4: check the
                 * standard deviation of this frame with reference of previous
                 * frames isPassedChecking &=
                 * isPassedStandardDeviation(spectrogramData);
                 * System.out.println("Result: " + isPassedChecking + "\n");
                 * return isPassedChecking; // end run all checking for debug
                 */

            } else {
                System.err.println("is error: the wave needed to be higher sample rate");
            }

        } else {
            System.out.println("The sample size must be a power of 2");
        }

        return false;
    }

    protected void normalizeSpectrogramData(double[][] spectrogramData) {

        // normalization of absoultSpectrogram
        // set max and min amplitudes
        double maxAmp = Double.MIN_VALUE;
        double minAmp = Double.MAX_VALUE;
        for (int i = 0; i < spectrogramData.length; i++) {
            for (int j = 0; j < spectrogramData[i].length; j++) {
                if (spectrogramData[i][j] > maxAmp) {
                    maxAmp = spectrogramData[i][j];
                } else if (spectrogramData[i][j] < minAmp) {
                    minAmp = spectrogramData[i][j];
                }
            }
        }
        // end set max and min amplitudes

        // normalization
        // avoiding divided by zero
        double minValidAmp = 0.00000000001F;
        if (minAmp == 0) {
            minAmp = minValidAmp;
        }

        double diff = Math.log10(maxAmp / minAmp); // perceptual difference
        for (int i = 0; i < spectrogramData.length; i++) {
            for (int j = 0; j < spectrogramData[i].length; j++) {
                if (spectrogramData[i][j] < minValidAmp) {
                    spectrogramData[i][j] = 0;
                } else {
                    spectrogramData[i][j] = (Math.log10(spectrogramData[i][j] / minAmp)) / diff;
                }
            }
        }
        // end normalization
    }

    protected boolean isPassedStandardDeviation(double[][] spectrogramData) {

        // normalize the spectrogramData (with all frames in the spectrogram)
        normalizeSpectrogramData(spectrogramData);

        // analyst data in this frame
        // since fftSampleSize==numSamples, there're only one spectrum which is
        // spectrogramData[last]
        double[] spectrum = spectrogramData[spectrogramData.length - 1];
        // find top most robust frequencies in this frame
        double[] robustFrequencies = new double[numRobust];
        ArrayRankDouble arrayRankDouble = new ArrayRankDouble();
        double nthValue = arrayRankDouble.getNthOrderedValue(spectrum, numRobust, false);
        // end analyst data in this frame

        int count = 0;
        for (int i = 0; i < spectrum.length; i++) {
            if (spectrum[i] >= nthValue) {
                robustFrequencies[count++] = spectrum[i];
                if (count >= numRobust) {
                    break;
                }
            }
        }
        // end find top most robust frequencies

        StandardDeviation standardDeviation = new StandardDeviation();
        double sd = standardDeviation.evaluate(robustFrequencies);

        // range of standard deviation
        boolean result = (sd >= minStandardDeviation && sd <= maxStandardDeviation);
        // System.out.println("sd: " + sd + " " + result);
        return result;
    }

    protected boolean isPassedFrequency(double[] spectrum) {
        // find the robust frequency
        ArrayRankDouble arrayRankDouble = new ArrayRankDouble();
        double robustFrequency = arrayRankDouble.getMaxValueIndex(spectrum) * unitFrequency;

        // frequency of the sound should not be too low or too high
        boolean result = (robustFrequency >= minFrequency && robustFrequency <= maxFrequency);
        // System.out.println("freq: " + robustFrequency + " " + result);
        return result;
    }

    protected boolean isPassedIntensity(double[] spectrum) {
        // get the average intensity of the signal
        double intensity = 0;
        for (int i = 0; i < spectrum.length; i++) {
            intensity += spectrum[i];
        }
        intensity /= spectrum.length;
        // end get the average intensity of the signal

        // intensity of the whistle should not be too soft
        boolean result = (intensity > minIntensity && intensity <= maxIntensity);
        // System.out.println("intensity: " + intensity + " " + result);

        return result;
    }

    protected boolean isPassedZeroCrossingRate(short[] amplitudes) {
        ZeroCrossingRate zcr = new ZeroCrossingRate(amplitudes, 1);
        int numZeroCrosses = (int) zcr.evaluate();

        // different sound has different range of zero crossing value
        // when lengthInSecond=1, zero crossing rate is the num
        // of zero crosses
        boolean result = (numZeroCrosses >= minNumZeroCross && numZeroCrosses <= maxNumZeroCross);
        // System.out.println("zcr: " + numZeroCrosses + " " +result);

        return result;
    }

}