Android Open Source - K6nele Raw Audio Recorder






From Project

Back to project page K6nele.

License

The source code is released under:

Apache License

If you think the Android project K6nele listed in this page is inappropriate, such as containing malicious code/tools or violating the copyright, please email info at java2s dot com, thanks.

Java Source Code

/*
 * Copyright 2011-2012, Institute of Cybernetics at Tallinn University of Technology
 *//from w w w  .j a v  a 2  s. c om
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ee.ioc.phon.android.speak;

import android.media.AudioFormat;
import android.media.AudioRecord;
import android.media.MediaRecorder;

/**
 * <p>Records raw audio using AudioRecord and stores it into a byte array as</p>
 * <ul>
 * <li>signed</li>
 * <li>16-bit</li>
 * <li>little endian</li>
 * <li>mono</li>
 * <li>16kHz (recommended, but a different sample rate can be specified in the constructor)</li>
 * </ul>
 *
 * <p>For example, the corresponding <code>arecord</code> settings are</p>
 *
 * <pre>
 * arecord --file-type raw --format=S16_LE --channels 1 --rate 16000
 * </pre>
 *
 * TODO: maybe use: ByteArrayOutputStream
 *
 * @author Kaarel Kaljurand
 */
public class RawAudioRecorder {

  private static final String LOG_TAG = RawAudioRecorder.class.getName();

  private static final int DEFAULT_AUDIO_SOURCE = MediaRecorder.AudioSource.VOICE_RECOGNITION;
  private static final int DEFAULT_SAMPLE_RATE = 16000;

  private static final int RESOLUTION = AudioFormat.ENCODING_PCM_16BIT;
  private static final short RESOLUTION_IN_BYTES = 2;

  // Number of channels (MONO = 1, STEREO = 2)
  private static final short CHANNELS = 1;

  public enum State {
    // recorder is ready, but not yet recording
    READY,

    // recorder recording
    RECORDING,

    // error occurred, reconstruction needed
    ERROR,

    // recorder stopped
    STOPPED
  };

  private AudioRecord mRecorder = null;

  private double mAvgEnergy = 0;

  private final int mSampleRate;
  private final int mOneSec;

  // Recorder state
  private State mState;

  // Buffer size
  private int mBufferSize;

  // Number of frames written to byte array on each output
  private int mFramePeriod;

  // The complete space into which the recording in written.
  // Its maximum length is about:
  // 2 (bytes) * 1 (channels) * 30 (max rec time in seconds) * 44100 (times per second) = 2 646 000 bytes
  // but typically is:
  // 2 (bytes) * 1 (channels) * 20 (max rec time in seconds) * 16000 (times per second) = 640 000 bytes
  private final byte[] mRecording;

  // TODO: use: mRecording.length instead
  private int mRecordedLength = 0;

  // The number of bytes the client has already consumed
  private int mConsumedLength = 0;

  // Buffer for output
  private byte[] mBuffer;


  /**
   * <p>Instantiates a new recorder and sets the state to INITIALIZING.
   * In case of errors, no exception is thrown, but the state is set to ERROR.</p>
   *
   * <p>Android docs say: 44100Hz is currently the only rate that is guaranteed to work on all devices,
   * but other rates such as 22050, 16000, and 11025 may work on some devices.</p>
   *
   * @param audioSource Identifier of the audio source (e.g. microphone)
   * @param sampleRate Sample rate (e.g. 16000)
   */
  public RawAudioRecorder(int audioSource, int sampleRate) {
    mSampleRate = sampleRate;
    // E.g. 1 second of 16kHz 16-bit mono audio takes 32000 bytes.
    mOneSec = RESOLUTION_IN_BYTES * CHANNELS * mSampleRate;
    // TODO: replace 35 with the max length of the recording (as specified in the settings)
    mRecording = new byte[mOneSec * 35];
    try {
      setBufferSizeAndFramePeriod();
      mRecorder = new AudioRecord(audioSource, mSampleRate, AudioFormat.CHANNEL_CONFIGURATION_MONO, RESOLUTION, mBufferSize);
      if (getAudioRecordState() != AudioRecord.STATE_INITIALIZED) {
        throw new Exception("AudioRecord initialization failed");
      }
      mBuffer = new byte[mFramePeriod * RESOLUTION_IN_BYTES * CHANNELS];
      setState(State.READY);
    } catch (Exception e) {
            handleError();
      if (e.getMessage() == null) {
        Log.e(LOG_TAG, "Unknown error occured while initializing recording");
      } else {
        Log.e(LOG_TAG, e.getMessage());
      }
    }
  }


  public RawAudioRecorder(int sampleRate) {
    this(DEFAULT_AUDIO_SOURCE, sampleRate);
  }


  public RawAudioRecorder() {
    this(DEFAULT_AUDIO_SOURCE, DEFAULT_SAMPLE_RATE);
  }


  private int read(AudioRecord recorder) {
    // public int read (byte[] audioData, int offsetInBytes, int sizeInBytes)
    int numberOfBytes = recorder.read(mBuffer, 0, mBuffer.length); // Fill buffer

    // Some error checking
    if (numberOfBytes == AudioRecord.ERROR_INVALID_OPERATION) {
      Log.e(LOG_TAG, "The AudioRecord object was not properly initialized");
      return -1;
    } else if (numberOfBytes == AudioRecord.ERROR_BAD_VALUE) {
      Log.e(LOG_TAG, "The parameters do not resolve to valid data and indexes.");
      return -2;
    } else if (numberOfBytes > mBuffer.length) {
      Log.e(LOG_TAG, "Read more bytes than is buffer length:" + numberOfBytes + ": " + mBuffer.length);
      return -3;
    } else if (numberOfBytes == 0) {
      Log.e(LOG_TAG, "Read zero bytes");
      return -4;
    }
    // Everything seems to be OK, adding the buffer to the recording.
    add(mBuffer);
    return 0;
  }


  // old version
  private void setBufferSizeAndFramePeriod_812() {
    // The interval in which the recorded samples are output to the file
    // TODO: explain why 120
    final int TIMER_INTERVAL = 120;
    mFramePeriod = mSampleRate * TIMER_INTERVAL / 1000;
    mBufferSize = mFramePeriod * 2 * RESOLUTION_IN_BYTES * CHANNELS;

    // Check to make sure buffer size is not smaller than the smallest allowed one
    if (mBufferSize < AudioRecord.getMinBufferSize(mSampleRate, AudioFormat.CHANNEL_CONFIGURATION_MONO, RESOLUTION)) {
      mBufferSize = AudioRecord.getMinBufferSize(mSampleRate, AudioFormat.CHANNEL_CONFIGURATION_MONO, RESOLUTION);
      // Set frame period and timer interval accordingly
      mFramePeriod = mBufferSize / ( 2 * RESOLUTION_IN_BYTES * CHANNELS );
      Log.i(LOG_TAG, "AudioRecord buffer size (MIN): " + mBufferSize);
    }
  }


  private void setBufferSizeAndFramePeriod() {
    int minBufferSizeInBytes = AudioRecord.getMinBufferSize(mSampleRate, AudioFormat.CHANNEL_CONFIGURATION_MONO, RESOLUTION);
    if (minBufferSizeInBytes == AudioRecord.ERROR_BAD_VALUE) {
      throw new IllegalArgumentException("AudioRecord.getMinBufferSize: parameters not supported by hardware");
    } else if (minBufferSizeInBytes == AudioRecord.ERROR) {
      Log.e(LOG_TAG, "AudioRecord.getMinBufferSize: unable to query hardware for output properties");
      minBufferSizeInBytes = mSampleRate * (120 / 1000) * RESOLUTION_IN_BYTES * CHANNELS;
    }
    mBufferSize = 2 * minBufferSizeInBytes;
    mFramePeriod = mBufferSize / ( 2 * RESOLUTION_IN_BYTES * CHANNELS );
    Log.i(LOG_TAG, "AudioRecord buffer size: " + mBufferSize + ", min size = " + minBufferSizeInBytes);
  }


  /**
   * @return recorder state
   */
  public State getState() {
    return mState;
  }

  private void setState(State state) {
    mState = state;
  }


  /**
   * @return bytes that have been recorded since the beginning
   */
  public byte[] getCompleteRecording() {
    return getCurrentRecording(0);
  }


  /**
   * @return bytes that have been recorded since the beginning, with wav-header
   */
  public byte[] getCompleteRecordingAsWav() {
    byte[] pcm = getCompleteRecording();
    int headerLen = 44;
    int totalDataLen = pcm.length + headerLen;
    int byteRate = mSampleRate * RESOLUTION_IN_BYTES; // mSampleRate*(16/8)*1 ???
    int totalAudioLen = pcm.length;

    byte[] header = new byte[headerLen];

    header[0] = 'R';  // RIFF/WAVE header
    header[1] = 'I';
    header[2] = 'F';
    header[3] = 'F';
    header[4] = (byte) (totalDataLen & 0xff);
    header[5] = (byte) ((totalDataLen >> 8) & 0xff);
    header[6] = (byte) ((totalDataLen >> 16) & 0xff);
    header[7] = (byte) ((totalDataLen >> 24) & 0xff);
    header[8] = 'W';
    header[9] = 'A';
    header[10] = 'V';
    header[11] = 'E';
    header[12] = 'f';  // 'fmt ' chunk
    header[13] = 'm';
    header[14] = 't';
    header[15] = ' ';
    header[16] = 16;  // 4 bytes: size of 'fmt ' chunk
    header[17] = 0;
    header[18] = 0;
    header[19] = 0;
    header[20] = 1;  // format = 1
    header[21] = 0;
    header[22] = (byte) CHANNELS;
    header[23] = 0;
    header[24] = (byte) (mSampleRate & 0xff);
    header[25] = (byte) ((mSampleRate >> 8) & 0xff);
    header[26] = (byte) ((mSampleRate >> 16) & 0xff);
    header[27] = (byte) ((mSampleRate >> 24) & 0xff);
    header[28] = (byte) (byteRate & 0xff);
    header[29] = (byte) ((byteRate >> 8) & 0xff);
    header[30] = (byte) ((byteRate >> 16) & 0xff);
    header[31] = (byte) ((byteRate >> 24) & 0xff);
    header[32] = (byte) (2 * 16 / 8);  // block align
    header[33] = 0;
    header[34] = (byte) 8*RESOLUTION_IN_BYTES;  // bits per sample
    header[35] = 0;
    header[36] = 'd';
    header[37] = 'a';
    header[38] = 't';
    header[39] = 'a';
    header[40] = (byte) (totalAudioLen & 0xff);
    header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
    header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
    header[43] = (byte) ((totalAudioLen >> 24) & 0xff);

    byte[] wav = new byte[header.length + pcm.length];
    System.arraycopy(header, 0, wav, 0, header.length);
    System.arraycopy(pcm, 0, wav, header.length, pcm.length);
    return wav;
  }


  /**
   * @return bytes that have been recorded since this method was last called
   */
  public synchronized byte[] consumeRecording() {
    byte[] bytes = getCurrentRecording(mConsumedLength);
    Log.i(LOG_TAG, "Copied from: " + mConsumedLength + ": " + bytes.length + " bytes");
    mConsumedLength = mRecordedLength;
    return bytes;
  }

    /**
     * Returns the recorded bytes since the last call, and resets the recording.
     * @return bytes that have been recorded since this method was last called
     */
    public synchronized byte[] consumeRecordingAndTruncate() {
        byte[] bytes = getCurrentRecording(mConsumedLength);
        Log.i(LOG_TAG, "Copied from position: " + mConsumedLength + ": " + bytes.length + " bytes");
        mRecordedLength = 0;
        mConsumedLength = mRecordedLength;
        return bytes;
    }

  private byte[] getCurrentRecording(int startPos) {
    int len = getLength() - startPos;
    byte[] bytes = new byte[len];
    System.arraycopy(mRecording, startPos, bytes, 0, len);
    return bytes;
  }


  public int getLength() {
    return mRecordedLength;
  }


  /**
   * @return <code>true</code> iff a speech-ending pause has occurred at the end of the recorded data
   */
  public boolean isPausing() {
    double pauseScore = getPauseScore();
    Log.i(LOG_TAG, "Pause score: " + pauseScore);
    return pauseScore > 7;
  }


  /**
   * @return volume indicator that shows the average volume of the last read buffer
   */
  public float getRmsdb() {
    long sumOfSquares = getRms(mRecordedLength, mBuffer.length);
    double rootMeanSquare = Math.sqrt(sumOfSquares / (mBuffer.length / 2));
    if (rootMeanSquare > 1) {
      // TODO: why 10?
      return (float) (10 * Math.log10(rootMeanSquare));
    }
    return 0;
  }


  /**
   * <p>In order to calculate if the user has stopped speaking we take the
   * data from the last second of the recording, map it to a number
   * and compare this number to the numbers obtained previously. We
   * return a confidence score (0-INF) of a longer pause having occurred in the
   * speech input.</p>
   *
   * <p>TODO: base the implementation on some well-known technique.</p>
   *
   * @return positive value which the caller can use to determine if there is a pause
   */
  private double getPauseScore() {
    long t2 = getRms(mRecordedLength, mOneSec);
    if (t2 == 0) {
      return 0;
    }
    double t = mAvgEnergy / t2;
    mAvgEnergy = (2 * mAvgEnergy + t2) / 3;
    return t;
  }


  /**
   * <p>Stops the recording (if needed) and releases the resources.
   * The object can no longer be used and the reference should be
   * set to null after a call to release().</p>
   */
  public synchronized void release() {
    if (mRecorder != null) {
      if (mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) {
        stop();
      }
      mRecorder.release();
      mRecorder = null;
    }
  }


  /**
   * <p>Starts the recording, and sets the state to RECORDING.</p>
   */
  public void start() {
    if (getAudioRecordState() == AudioRecord.STATE_INITIALIZED) {
      mRecorder.startRecording();
      if (mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) {
        setState(State.RECORDING);
        new Thread() {
          public void run() {
            while (mRecorder != null && mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) {
              int status = read(mRecorder);
              if (status < 0) {
                                handleError();
                break;
              }
            }
          }
        }.start();
      } else {
        Log.e(LOG_TAG, "startRecording() failed");
                handleError();
      }
    } else {
      Log.e(LOG_TAG, "start() called on illegal state");
            handleError();
    }
  }


  /**
   * <p>Stops the recording, and sets the state to STOPPED.
   * If stopping fails then sets the state to ERROR.</p>
   */
  public void stop() {
    // We check the underlying AudioRecord state trying to avoid IllegalStateException.
    // If it still occurs then we catch it.
    if (getAudioRecordState() == AudioRecord.STATE_INITIALIZED &&
        mRecorder.getRecordingState() == AudioRecord.RECORDSTATE_RECORDING) {
      try {
        mRecorder.stop();
        setState(State.STOPPED);
      } catch (IllegalStateException e) {
        Log.e(LOG_TAG, "native stop() called in illegal state: " + e.getMessage());
                handleError();
      }
    } else {
      Log.e(LOG_TAG, "stop() called in illegal state");
            handleError();
    }
  }


  /**
   * <p>Copy the given byte array into the total recording array.</p>
   *
   * <p>The total recording array has been pre-allocated (e.g. for 35 seconds of audio).
   * If it gets full then the recording is stopped.</p>
   *
   * @param buffer audio buffer
   */
  private void add(byte[] buffer) {
    if (mRecording.length >= mRecordedLength + buffer.length) {
      // arraycopy(Object src, int srcPos, Object dest, int destPos, int length)
      System.arraycopy(buffer, 0, mRecording, mRecordedLength, buffer.length);
      mRecordedLength += buffer.length;
    } else {
      // This also happens on the emulator for some reason
      Log.e(LOG_TAG, "Recorder buffer overflow: " + mRecordedLength);
      release();
    }
  }


  private long getRms(int end, int span) {
    int begin = end - span;
    if (begin < 0) {
      begin = 0;
    }
    // make sure begin is even
    if (0 != (begin % 2)) {
      begin++;
    }

    long sum = 0;
    for (int i = begin; i < end; i+=2) {
      // TODO: We don't need the whole short, just take the 2nd byte (the more significant one)
      // byte curSample = mCurrentRecording[i+1];

      short curSample = getShort(mRecording[i], mRecording[i+1]);
      sum += curSample * curSample;
    }
    return sum;
  }


  /*
   * <p>Converts two bytes to a short, assuming that the 2nd byte is
   * more significant (LITTLE_ENDIAN format).</p>
   *
   * <pre>
   * 255 | (255 << 8)
   * 65535
   * </pre>
   */
  private static short getShort(byte argB1, byte argB2) {
    return (short) (argB1 | (argB2 << 8));
  }


    private void handleError() {
        setState(State.ERROR);
        release();
    }

    private int getAudioRecordState() {
        if (mRecorder == null) {
            return AudioRecord.STATE_UNINITIALIZED;
        }
        return mRecorder.getState();
    }
}




Java Source Code List

ee.ioc.phon.android.speak.AboutActivity.java
ee.ioc.phon.android.speak.AppListActivity.java
ee.ioc.phon.android.speak.AppListCursorAdapter.java
ee.ioc.phon.android.speak.AudioCue.java
ee.ioc.phon.android.speak.AudioPauser.java
ee.ioc.phon.android.speak.Caller.java
ee.ioc.phon.android.speak.ChunkedWebRecSessionBuilder.java
ee.ioc.phon.android.speak.Constants.java
ee.ioc.phon.android.speak.DetailsActivity.java
ee.ioc.phon.android.speak.ExecutableString.java
ee.ioc.phon.android.speak.Executable.java
ee.ioc.phon.android.speak.Extras.java
ee.ioc.phon.android.speak.GetLanguageDetailsReceiver.java
ee.ioc.phon.android.speak.GrammarListActivity.java
ee.ioc.phon.android.speak.Log.java
ee.ioc.phon.android.speak.MicButton.java
ee.ioc.phon.android.speak.OnSwipeTouchListener.java
ee.ioc.phon.android.speak.PackageNameRegistry.java
ee.ioc.phon.android.speak.PreferencesRecognitionServiceHttp.java
ee.ioc.phon.android.speak.PreferencesRecognitionServiceWs.java
ee.ioc.phon.android.speak.Preferences.java
ee.ioc.phon.android.speak.RawAudioRecorder.java
ee.ioc.phon.android.speak.RecognizerIntentActivity.java
ee.ioc.phon.android.speak.RecognizerIntentListActivity.java
ee.ioc.phon.android.speak.RecognizerIntentService.java
ee.ioc.phon.android.speak.RecognizerIntent.java
ee.ioc.phon.android.speak.ServerListActivity.java
ee.ioc.phon.android.speak.SpeechRecognitionService.java
ee.ioc.phon.android.speak.Utils.java
ee.ioc.phon.android.speak.VoiceImeService.java
ee.ioc.phon.android.speak.VoiceImeView.java
ee.ioc.phon.android.speak.WebSocketRecognizer.java
ee.ioc.phon.android.speak.WebSocketResponse.java
ee.ioc.phon.android.speak.demo.AbstractRecognizerDemoActivity.java
ee.ioc.phon.android.speak.demo.ExtrasDemo.java
ee.ioc.phon.android.speak.demo.RepeaterDemo.java
ee.ioc.phon.android.speak.demo.SimpleDemo.java
ee.ioc.phon.android.speak.demo.VoiceSearchDemo.java
ee.ioc.phon.android.speak.provider.App.java
ee.ioc.phon.android.speak.provider.AppsContentProvider.java
ee.ioc.phon.android.speak.provider.BaseColumnsImpl.java
ee.ioc.phon.android.speak.provider.FileContentProvider.java
ee.ioc.phon.android.speak.provider.Grammar.java
ee.ioc.phon.android.speak.provider.Server.java