Program to record speech audio into RAM and then play it back, moving Billy Bass's mouth in sync with the speech.

Dependencies:   mbed

Remember Big Mouth Billy Bass?

I've made a simple demo program for him using the Freescale FRDM-KL25Z board. I've hooked up the digital I/O to his motor driver transistors and pushbutton switch.

This program records 1.8 seconds of speech audio from ADC input when the pushbutton is pressed, then plays the audio back with Billy Bass's mouth controlled so that it opens during vowel sounds.

The ADC input is driven from a microphone and preamplifier, via a capacitor and into a resistor divider connected to the +3.3V supply pin to provide mid-range biasing for the ADC signals.

The DAC output is connected to his audio amplifier input (to the trace that was connected to pin 10 of the controller IC). I had to provide a DC bias using the DAC to get the single transistor amplifier biased into proper operation.

For more on the method of vowel recognition, please see the paper: http://www.mirlab.org/conference_papers/International_Conference/ICASSP%201999/PDF/AUTHOR/IC991957.PDF

Y. Nishida, Y. Nakadai, Y. Suzuki, T. Sakurai, T. Kurokawa, and H. Sato. 1999.

Voice recognition focusing on vowel strings on a fixed-point 20-MIPS DSP board.

In Proceedings of the Acoustics, Speech, and Signal Processing, 1999. on 1999 IEEE International Conference - Volume 01 (ICASSP '99), Vol. 1. IEEE Computer Society, Washington, DC, USA, 137-140. DOI=10.1109/ICASSP.1999.758081 http://dx.doi.org/10.1109/ICASSP.1999.758081

Files at this revision

API Documentation at this revision

Comitter:
bikeNomad
Date:
Wed May 15 15:32:34 2013 +0000
Parent:
2:5bcd2f55a294
Child:
4:c989412b91ea
Commit message:
Got audio sampling and analysis working

Changed in this revision

AudioAnalyzer.cpp Show annotated file Show diff for this revision Revisions of this file
AudioAnalyzer.h Show annotated file Show diff for this revision Revisions of this file
FastAnalogIn.h Show annotated file Show diff for this revision Revisions of this file
core_cm0.h Show annotated file Show diff for this revision Revisions of this file
main.cpp Show annotated file Show diff for this revision Revisions of this file
mbed.bld Show diff for this revision Revisions of this file
mbed.lib Show annotated file Show diff for this revision Revisions of this file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/AudioAnalyzer.cpp	Wed May 15 15:32:34 2013 +0000
@@ -0,0 +1,35 @@
+#include "mbed.h"
+#include "AudioAnalyzer.h"
+
+namespace NK
+{
+
+void AudioAnalyzer::analyze()
+{
+    if (analyzed) return;
+
+    // compute the sum of the squares of the input samples
+    power = 0;
+    zeroCrossings = 0;
+    minValue = 127;
+    maxValue = -128;
+    int8_t const * const lastSample = samples + nsamples - 1;
+    bool sign, lastSign = samples[0] < 0;
+    for (int8_t const *p = samples; p <= lastSample; p++) {
+        int8_t val = *p++;
+        power += ((int16_t)val * val);
+        sign = (val < 0);
+        if (sign != lastSign)
+            zeroCrossings++;
+        lastSign = sign;
+        if (val > maxValue)
+            maxValue = val;
+        if (val < minValue)
+            minValue = val;
+    }
+    // normalize power
+    power /= nsamples;
+    analyzed = true;
+}
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/AudioAnalyzer.h	Wed May 15 15:32:34 2013 +0000
@@ -0,0 +1,43 @@
+#ifndef __included_audio_analyzer_h
+#define __included_audio_analyzer_h
+
+namespace NK
+{
+
+class AudioAnalyzer
+{
+protected:
+    int8_t const *samples;
+    uint16_t nsamples;
+    uint16_t zeroCrossings;
+    uint32_t power;
+    int8_t minValue;
+    int8_t maxValue;
+    bool analyzed;
+
+    void analyze();
+
+public:
+    AudioAnalyzer(int8_t const *_samples, uint16_t _nsamples)
+        : samples(_samples), nsamples(_nsamples), zeroCrossings(0), power(0), analyzed(false) {
+    }
+
+    uint16_t getZeroCrossings() {
+        if (!analyzed) analyze();
+        return zeroCrossings;
+    }
+
+    uint32_t getPower() {
+        if (!analyzed) analyze();
+        return power;
+    }
+
+    void getMinMaxValues(int8_t *min, int8_t *max) {
+        if (!analyzed) analyze();
+        *min = minValue;
+        *max = maxValue;
+    }
+};
+
+} // namespace NK
+#endif
\ No newline at end of file
--- a/FastAnalogIn.h	Tue May 14 17:19:45 2013 +0000
+++ b/FastAnalogIn.h	Wed May 15 15:32:34 2013 +0000
@@ -30,13 +30,16 @@
     }
 
     uint16_t read_u16_nowait() {
-        // Return value
         return (uint16_t)ADC0->R[0];
     }
 
-    uint8_t read_u8_nowait() {
-        // Return value
-        return (uint8_t)ADC0->R[0];
+    // 0x0000 => 0x80 (-128)
+    // 0x7fff => 0xFF (-1)
+    // 0x8000 => 0x00 (0)
+    // 0xffff => 0x7f (127)
+    int8_t read_s8_nowait() {
+        int32_t val = read_u16_nowait();
+        return (val + 0x80 - 0x8000) >> 8;
     }
 
     FastAnalogIn(PinName pin)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/core_cm0.h	Wed May 15 15:32:34 2013 +0000
@@ -0,0 +1,1 @@
+#include "core_cm0plus.h"
\ No newline at end of file
--- a/main.cpp	Tue May 14 17:19:45 2013 +0000
+++ b/main.cpp	Wed May 15 15:32:34 2013 +0000
@@ -1,5 +1,10 @@
 #include "mbed.h"
 #include "FastAnalogIn.h"
+#include "AudioAnalyzer.h"
+extern "C" {
+#include <math.h>
+}
+
 using namespace NK;
 
 // Power:
@@ -29,19 +34,20 @@
 const unsigned SAMPLE_RATE_HZ  = 7889;
 const unsigned SAMPLE_PERIOD_US     = (1000000U / SAMPLE_RATE_HZ);
 const unsigned SAMPLE_BUFFER_SIZE = 9000;
+const float CHUNK_DURATION = 0.08;
 
 Ticker sampleTicker;
 Timer timer;
 
-uint8_t sampleBuffer[SAMPLE_BUFFER_SIZE];      // 1 second buffer
-uint8_t * volatile nextSample;
-unsigned volatile samplesRemaining;
+int8_t sampleBuffer[SAMPLE_BUFFER_SIZE];      // 1 second buffer
+int8_t * volatile nextSample;
+uint16_t volatile samplesRemaining;
 
 extern "C"
 void ADC0_IRQHandler(void)
 {
     if (samplesRemaining) {
-        *nextSample++ = microphone.read_u16_nowait() >> 8;
+        *nextSample++ = microphone.read_s8_nowait();
         microphone.start_read();
         samplesRemaining--;
     } else {
@@ -54,7 +60,8 @@
 void playAudioSample()
 {
     if (samplesRemaining) {
-        speaker.write_u16(*nextSample++ << 8);
+        int8_t val = *nextSample++;
+        speaker.write_u16((val + 128) << 8);
         samplesRemaining--;
     } else {
         sampleTicker.detach();
@@ -62,10 +69,10 @@
     }
 }
 
-void resetSampleBuffer()
+void resetSampleBuffer(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
 {
-    nextSample = sampleBuffer;
-    samplesRemaining = SAMPLE_BUFFER_SIZE;
+    nextSample = start;
+    samplesRemaining = nsamples;
 }
 
 void recordAudio()
@@ -74,6 +81,7 @@
     blueLED = 0.0;
 
     resetSampleBuffer();
+    timer.reset();
     timer.start();
     microphone.enable_interrupt();
     microphone.start_read();
@@ -83,36 +91,115 @@
         blueLED.write(1.0 - (1.0 * samplesRemaining / SAMPLE_BUFFER_SIZE));
     }
 
+    microphone.abort_read();
+
     float elapsed = timer.read();
     pc.printf("Done. %u samples in %f usec = %f samples/sec\r\n", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
 }
 
-void playAudio(float duration)
+void playAudio(float duration, int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
 {
-    pc.printf("Playing %d samples... ", SAMPLE_BUFFER_SIZE);
     greenLED = 0.0;
-    resetSampleBuffer();
+    resetSampleBuffer(start, nsamples);
     timer.reset();
     timer.start();
-    sampleTicker.attach(&playAudioSample, duration/SAMPLE_BUFFER_SIZE);
+    sampleTicker.attach(&playAudioSample, duration/nsamples);
     while (samplesRemaining) {
         wait_us(50000);
-        greenLED.write(1.0 - (1.0 *  samplesRemaining / SAMPLE_BUFFER_SIZE));
+        greenLED.write(1.0 - (1.0 *  samplesRemaining / nsamples));
+    }
+}
+
+void audioTest()
+{
+    double phase = 0.0;
+    resetSampleBuffer();
+    for (int8_t *p = sampleBuffer; p < sampleBuffer + SAMPLE_BUFFER_SIZE; p++) {
+        double s = sin(phase) * 125.0;
+        phase += 2000 * 3.1416 / SAMPLE_BUFFER_SIZE;
+        *p = static_cast<int8_t>(s);
+    }
+}
+
+// returns true if chunk was louder than minimum
+bool analyzeChunk(int8_t  *chunkStart, uint16_t chunkSize, float powerRef)
+{
+    AudioAnalyzer analyzer(chunkStart, chunkSize);
+    uint32_t power = analyzer.getPower();
+    uint16_t zcs = analyzer.getZeroCrossings();
+    int8_t min, max;
+    analyzer.getMinMaxValues(&min, &max);
+    float logPower = ::log((double)power);
+    if (isnan(logPower) || logPower < 1.0) {
+        return false;
     }
-    float elapsed = timer.read();
-    pc.printf("Done. %u samples in %f usec = %f samples/sec", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
-    pc.printf(" (Rate %#+0.2f%%)\r\n", (duration-elapsed)*100/duration);
+    float zcRatio = (float)zcs / chunkSize;
+    pc.printf("%.2f\t%.2f\t%d\t%d\t", zcRatio*100, logPower-powerRef, min, max);
+    return true;
+}
+
+void analyze(bool playToo = false)
+{
+    uint16_t chunkSize = SAMPLE_RATE_HZ  * CHUNK_DURATION;
+    uint16_t nChunks = SAMPLE_BUFFER_SIZE / chunkSize;
+    int8_t  *chunkStart = sampleBuffer;
+    AudioAnalyzer analyzer(sampleBuffer, SAMPLE_BUFFER_SIZE);
+    uint32_t power = analyzer.getPower();
+    float powerRef = ::log((double)power);
+    pc.printf("Reference power = %.2f\r\n", powerRef);
+    pc.printf("Analyzing %d chunks of %d samples (%.2f seconds):\r\n", nChunks, chunkSize, CHUNK_DURATION);
+    pc.printf("chunk\tstart\tzcratio\tlogp\tmin\tmax\tvowel\r\n");
+    for (uint16_t chunk = 0; chunk < nChunks; chunk++) {
+        pc.printf("%u\t%.2f\t", chunk, chunk * CHUNK_DURATION);
+        bool loudEnough = analyzeChunk(chunkStart, chunkSize, powerRef);
+        if (loudEnough) {
+            if (playToo) {
+                while (! pc.readable())
+                    playAudio(CHUNK_DURATION, chunkStart, chunkSize);
+                int c = pc.getc();
+                pc.putc(c);
+            } else
+                pc.puts("-");
+        }
+        pc.puts("\r\n");
+        chunkStart += chunkSize;
+    }
+}
+
+void dumpAudio(int8_t *start=sampleBuffer, uint16_t nsamples=SAMPLE_BUFFER_SIZE)
+{
+    for (int8_t *p = start; p < sampleBuffer + nsamples; p++) {
+        pc.printf("%d\r\n", *p);
+    }
 }
 
 int main()
 {
     pc.baud(115200);
     pc.printf("\r\n\r\nSample buffer = %u samples; rate = %u Hz; period = %u usec\r\n", SAMPLE_BUFFER_SIZE, SAMPLE_RATE_HZ, SAMPLE_PERIOD_US);
-    redLED = 1.0;
-    greenLED = 1.0;
-    blueLED = 1.0;
+
+    for (;;) {
+        redLED = 1.0;
+        greenLED = 1.0;
+        blueLED = 1.0;
+
+        pc.puts("ENTER when ready:");
+        pc.getc();
+        pc.puts("\r\n");
 
-    recordAudio();
+#if 0
+        audioTest();
+        playAudio(1.0);
+        analyze();
+#endif
 
-    playAudio(timer.read());
+        recordAudio();
+        float duration = timer.read();
+        playAudio(duration);
+        float elapsed = timer.read();
+        pc.printf("Done. %u samples in %f usec = %f samples/sec", SAMPLE_BUFFER_SIZE, elapsed * 1.0e6, SAMPLE_BUFFER_SIZE / elapsed);
+        pc.printf(" (Rate %#+0.2f%%)\r\n", (duration-elapsed)*100/duration);
+        analyze(true);
+        // dumpAudio();
+    }
 }
--- a/mbed.bld	Tue May 14 17:19:45 2013 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-http://mbed.org/users/mbed_official/code/mbed/builds/b3110cd2dd17
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mbed.lib	Wed May 15 15:32:34 2013 +0000
@@ -0,0 +1,1 @@
+http://mbed.org/users/mbed_official/code/mbed/#b3110cd2dd17