tom dunigan
/
tts
text-to-speech through DAC to audio amp/speaker
text-to-speech TTS
- This program was based on modifying the Arduino/Teensy TTS library https://github.com/manitou48/TTS
- The audio is generated with the ARM's DAC pin.
- TTS.h selects DAC pin based on MBED board. Only tested on K64F
- Teensy discussions https://forum.pjrc.com/threads/44587-TTS-(Text-to-Speech)-Library-Port
TTS.cpp
- Committer:
- manitou
- Date:
- 2017-06-24
- Revision:
- 3:d12c34704b6d
- Parent:
- 2:eceeac07154b
File content as of revision 3:d12c34704b6d:
/** * Text To Speech synthesis library * Copyright (c) 2008 Clive Webster. All rights reserved. * * Nov. 29th 2009 - Modified to work with Arduino by Gabriel Petrut: * The Text To Speech library uses Timer1 to generate the PWM * output on digital pin 10. The output signal needs to be fed * to an RC filter then through an amplifier to the speaker. * http://www.tehnorama.ro/minieric-modulul-de-control-si-sinteza-vocala/ * * Modified to allow use of different PWM pins by Stephen Crane. * Modified for Timer5 on Arduino Mega2560 by Peter Dambrowsky. */ #include "TTS.h" AnalogOut dac(DACpin); // Random number seed static byte seed0; static byte seed1; static byte seed2; static char phonemes[128]; static char modifier[128]; // must be same size as 'phonemes' static char g_text[128]; static byte defaultPitch = 7; // Lookup user specified pitch changes static const byte PROGMEM PitchesP[] = { 1, 2, 4, 6, 8, 10, 13, 16 }; /** * Find the single character 'token' in 'vocab' * and append its phonemes to dest[x] */ static int copyToken(char token, char *dest, int x, const VOCAB * vocab) { for (unsigned int ph = 0; ph < numVocab; ph++) { const char *txt = (const char *) pgm_read_word(&vocab[ph].txt); if (pgm_read_byte(&txt[0]) == token && pgm_read_byte(&txt[1]) == 0) { const char *src = (const char *) pgm_read_word(&vocab[ph].phoneme); while (pgm_read_byte(src)) { dest[x++] = pgm_read_byte(src); src++; } break; } } return x; } static byte whitespace(char c) { return (c == 0 || c == ' ' || c == ',' || c == '.' || c == '?' || c == '\'' || c == '!' || c == ':' || c == '/'); } /** * Enter: * src => English text in upper case * vocab => VOCAB array * dest => address to return result * return 1 if ok, or 0 if error */ static int textToPhonemes(const char *src, const VOCAB * vocab, char *dest) { int outIndex = 0; // Current offset into dest int inIndex = -1; // Starts at -1 so that a leading space is assumed while (inIndex == -1 || src[inIndex]) { // until end of text int maxMatch = 0; // Max chars matched on input text int numOut = 0; // Number of characters copied to output stream for the best match boolean endsInWhiteSpace = FALSE; int maxWildcardPos = 0; // Get next phoneme, P2 for (unsigned int ph = 0; ph < numVocab; ph++) { int y, x; char wildcard = 0; // modifier int wildcardInPos = 0; boolean hasWhiteSpace = FALSE; const char *text = (const char *) pgm_read_word(&vocab[ph].txt); const char *phon = (const char *) pgm_read_word(&vocab[ph].phoneme); for (y = 0;; y++) { char nextVocabChar = pgm_read_byte(&text[y]); char nextCharIn = (y + inIndex == -1) ? ' ' : src[y + inIndex]; if (nextCharIn >= 'a' && nextCharIn <= 'z') nextCharIn = nextCharIn - 'a' + 'A'; if (nextVocabChar == '#' && nextCharIn >= 'A' && nextCharIn <= 'Z') { wildcard = nextCharIn; // The character equivalent to the '#' wildcardInPos = y; continue; } if (nextVocabChar == '_') { // try to match against a white space hasWhiteSpace = TRUE; if (whitespace(nextCharIn)) continue; y--; break; } // check for end of either string if (nextVocabChar == 0 || nextCharIn == 0) break; if (nextVocabChar != nextCharIn) break; } // See if its the longest complete match so far if (y <= maxMatch || pgm_read_byte(&text[y])) continue; // This is the longest complete match maxMatch = y; maxWildcardPos = 0; x = outIndex; // offset into phoneme return data // Copy the matching phrase changing any '#' to the phoneme for the wildcard for (y = 0;; y++) { char c = pgm_read_byte(&phon[y]); if (c == 0) break; if (c == '#') { if (pgm_read_byte(&phon[y + 1]) == 0) { // replacement ends in wildcard maxWildcardPos = wildcardInPos; } else { x = copyToken(wildcard, dest, x, vocab); // Copy the phonemes for the wildcard character } } else { dest[x++] = c; } } dest[x] = 0; endsInWhiteSpace = hasWhiteSpace; // 14 numOut = x - outIndex; // The number of bytes added } // 15 - end of vocab table // 16 if (endsInWhiteSpace) maxMatch--; // 17 if (maxMatch == 0) { //loggerP(PSTR("Mistake in SAY, no token for ")); //logger(&src[inIndex]); //loggerCRLF(); return 0; } // 20 outIndex += numOut; if (outIndex > 128 - 16) { //loggerP(PSTR("Mistake in SAY, text too long\n")); return 0; } // 21 inIndex += (maxWildcardPos > 0) ? maxWildcardPos : maxMatch; } return 1; } /** * Convert phonemes to data string * Enter: textp = phonemes string * Return: phonemes = string of sound data * modifier = 2 bytes per sound data */ static int phonemesToData(const char *textp, const PHONEME * phoneme) { unsigned int phonemeOut = 0; // offset into the phonemes array unsigned int modifierOut = 0; // offset into the modifiers array unsigned int L81 = 0; // attenuate unsigned int L80 = 16; while (*textp) { // P20: Get next phoneme boolean anyMatch = FALSE; int longestMatch = 0; int numOut = 0; // The number of bytes copied to the output for the longest match // Get next phoneme, P2 for (unsigned int ph = 0; ph < numPhoneme; ph++) { int numChars; // Locate start of next phoneme const char *ph_text = (const char *) pgm_read_word(&phoneme[ph].txt); // Set 'numChars' to the number of characters // that we match against this phoneme for (numChars = 0; textp[numChars]; numChars++) { // get next input character and make lower case char nextChar = textp[numChars]; if (nextChar >= 'A' && nextChar <= 'Z') nextChar = nextChar - 'A' + 'a'; if (nextChar != pgm_read_byte(&ph_text[numChars])) break; } // if not the longest match so far then ignore if (numChars <= longestMatch) continue; // partial phoneme match if (pgm_read_byte(&ph_text[numChars])) continue; // P7: we have matched the whole phoneme longestMatch = numChars; // Copy phoneme data to 'phonemes' const char *ph_ph = (const char *) pgm_read_word(&phoneme[ph].phoneme); for (numOut = 0; pgm_read_byte(&ph_ph[numOut]); numOut++) phonemes[phonemeOut + numOut] = pgm_read_byte(&ph_ph[numOut]); L81 = pgm_read_byte(&phoneme[ph].attenuate) + '0'; anyMatch = TRUE; // phoneme match found modifier[modifierOut] = -1; modifier[modifierOut + 1] = 0; // Get char from text after the phoneme and test if it is a numeric if (textp[longestMatch] >= '0' && textp[longestMatch] <= '9') { // Pitch change requested modifier[modifierOut] = pgm_read_byte(&PitchesP[textp[longestMatch] - '1']); modifier[modifierOut + 1] = L81; longestMatch++; } // P10 if (L81 != '0' && L81 != L80 && modifier[modifierOut] >= 0) { modifier[modifierOut - 2] = modifier[modifierOut]; modifier[modifierOut - 1] = '0'; continue; } // P11 if ((textp[longestMatch - 1] | 0x20) == 0x20) { // end of input string or a space modifier[modifierOut] = (modifierOut == 0) ? 16 : modifier[modifierOut - 2]; } } // next phoneme // p13 L80 = L81; if (longestMatch == 0 && !anyMatch) { //loggerP(PSTR("Mistake in speech at ")); //logger(textp); //loggerCRLF(); return 0; } // Move over the bytes we have copied to the output phonemeOut += numOut; if (phonemeOut > sizeof(phonemes) - 16) { //loggerP(PSTR("Line too long\n")); return 0; } // P16 // Copy the modifier setting to each sound data element for this phoneme if (numOut > 2) for (int count = 0; count != numOut; count += 2) { modifier[modifierOut + count + 2] = modifier[modifierOut + count]; modifier[modifierOut + count + 3] = 0; } modifierOut += numOut; //p21 textp += longestMatch; } phonemes[phonemeOut++] = 'z'; phonemes[phonemeOut++] = 'z'; phonemes[phonemeOut++] = 'z'; phonemes[phonemeOut++] = 'z'; while (phonemeOut < sizeof(phonemes)) phonemes[phonemeOut++] = 0; while (modifierOut < sizeof(modifier)) { modifier[modifierOut++] = -1; modifier[modifierOut++] = 0; } return 1; } /* * A delay loop that doesn't change with different optimisation settings */ static void pause(byte delays) { wait_us(delays*6); } static void delay2(byte d) { wait_us(d*3127); } /* * Generate a random number */ static byte random2(void) { byte tmp = (seed0 & 0x48) + 0x38; seed0 <<= 1; if (seed1 & 0x80) seed0++; seed1 <<= 1; if (seed2 & 0x80) seed1++; seed2 <<= 1; if (tmp & 0x40) seed2++; return seed0; } static int pin; static void soundOff(void) { //dac.write(0); } #define PWM_TOP (1200/2) //https://sites.google.com/site/qeewiki/books/avr-guide/pwm-on-the-atmega328 static void soundOn(void) { // dac.write(0); // initialise random number seed seed0 = 0xecu; seed1 = 7; seed2 = 0xcfu; } // Logarithmic scale //static const int16_t PROGMEM Volume[8] = //{ 0, PWM_TOP * 0.01, PWM_TOP * 0.02, PWM_TOP * 0.03, PWM_TOP * 0.06, //PWM_TOP * 0.12, PWM_TOP * 0.25, PWM_TOP * 0.5 }; // Linear scale static const int16_t PROGMEM Volume[8] = { 0, (uint16_t)(PWM_TOP * 0.07), (uint16_t)(PWM_TOP * 0.14), (uint16_t)(PWM_TOP * 0.21), (uint16_t)(PWM_TOP * 0.29), (uint16_t)(PWM_TOP * 0.36), (uint16_t)(PWM_TOP * 0.43), (uint16_t)(PWM_TOP * 0.5) }; static void sound(byte b) { // Update PWM volume b = (b & 15); dac.write(0.5*b/16.); } static byte playTone(byte soundNum, byte soundPos, char pitch1, char pitch2, byte count, byte volume) { const byte *soundData = &SoundData[soundNum * 0x40]; while (count-- > 0) { byte s = pgm_read_byte(&soundData[soundPos & 0x3fu]); sound((byte) (s & volume)); pause(pitch1); sound((byte) ((s >> 4) & volume)); pause(pitch2); soundPos++; } return soundPos & 0x3fu; } static void play(byte duration, byte soundNumber) { while (duration--) playTone(soundNumber, random2(), 7, 7, 10, 15); } /****************************************************************************** * User API ******************************************************************************/ TTS::TTS() { } void TTS::setPitch(byte pitch) { defaultPitch = pitch; } byte TTS::getPitch(void) { return defaultPitch; } /* * Speak a string of phonemes */ void TTS::sayPhonemes(const char *textp) { byte phonemeIn, // offset into text byte2, modifierIn, // offset into stuff in modifier punctuationPitchDelta; // change in pitch due to fullstop or question mark int8_t byte1; char phoneme; const SOUND_INDEX *soundIndex; byte sound1Num; // Sound data for the current phoneme byte sound2Num; // Sound data for the next phoneme byte sound2Stop; // Where the second sound should stop char pitch1; // pitch for the first sound char pitch2; // pitch for the second sound short i; byte sound1Duration; // the duration for sound 1 if (phonemesToData(textp, s_phonemes)) { // phonemes has list of sound bytes soundOn(); // _630C byte1 = 0; punctuationPitchDelta = 0; // Q19 for (phonemeIn = 0, modifierIn = 0; phonemes[phonemeIn]; phonemeIn += 2, modifierIn += 2) { byte duration; // duration from text line byte SoundPos; // offset into sound data byte fadeSpeed = 0; phoneme = phonemes[phonemeIn]; if (phoneme == 'z') { delay2(15); continue; } else if (phoneme == '#') { continue; } else { // Collect info on sound 1 soundIndex = &SoundIndex[phoneme - 'A']; sound1Num = pgm_read_byte(&soundIndex->SoundNumber); byte1 = pgm_read_byte(&soundIndex->byte1); byte2 = pgm_read_byte(&soundIndex->byte2); duration = phonemes[phonemeIn + 1] - '0'; // Get duration from the input line if (duration != 1) duration <<= 1; duration += 6; // scaled duration from the input line (at least 6) sound2Stop = 0x40 >> 1; pitch1 = modifier[modifierIn]; if (modifier[modifierIn + 1] == 0 || pitch1 == -1) { pitch1 = 10; duration -= 6; } else if (modifier[modifierIn + 1] == '0' || duration == 6) { duration -= 6; } // q8 pitch2 = modifier[modifierIn + 2]; if (modifier[modifierIn + 3] == 0 || pitch2 == -1) pitch2 = 10; // q10 if (byte1 < 0) { sound1Num = 0; random2(); sound2Stop = (0x40 >> 1) + 2; } else { // is positive if (byte1 == 2) { // 64A4 // Make a white noise sound ! byte volume = (duration == 6) ? 15 : 1; // volume mask for (duration <<= 2; duration > 0; duration--) { playTone(sound1Num, random2(), 8, 12, 11, volume); // Increase the volume if (++volume == 16) volume = 15; // full volume from now on } continue; } else { // q11 if (byte1) delay2(25); } } } // 6186 pitch1 += defaultPitch + punctuationPitchDelta; if (pitch1 < 1) pitch1 = 1; pitch2 += defaultPitch + punctuationPitchDelta; if (pitch2 < 1) pitch2 = 1; // get next phoneme phoneme = phonemes[phonemeIn + 2]; if (phoneme == 0 || phoneme == 'z') { if (duration == 1) delay2(60); phoneme = 'a'; // change to a pause } else { // s6 if (byte2 != 1) byte2 = (byte2 + pgm_read_byte(&SoundIndex[phoneme - 'A'].byte2)) >> 1; if (byte1 < 0 || pgm_read_byte(&SoundIndex[phoneme - 'A'].byte1)) phoneme = 'a'; // change to a pause } // S10 sound2Num = pgm_read_byte(&SoundIndex[phoneme - 'A'].SoundNumber); sound1Duration = 0x80; // play half of sound 1 if (sound2Num == sound1Num) byte2 = duration; // S11 if ((byte2 >> 1) == 0) { sound1Duration = 0xff; // play all of sound 1 } else { // The fade speed between the two sounds fadeSpeed = (sound1Duration + (byte2 >> 1)) / byte2; if (duration == 1) { sound2Stop = 0x40; // dont play sound2 sound1Duration = 0xff; // play all of sound 1 pitch1 = 12; } } SoundPos = 0; do { byte sound1Stop = (sound1Duration >> 2) & 0x3fu; byte sound1End = sound1Stop; if (sound2Stop < sound1End) sound1End = sound2Stop; // min if (sound1Stop) SoundPos = playTone(sound1Num, SoundPos, pitch1, pitch1, sound1End, 15); // s18 if (sound2Stop != 0x40) { SoundPos = playTone(sound2Num, SoundPos, pitch2, pitch2, (byte) (sound2Stop - sound1End), 15); } // s23 if (sound1Duration != 0xff && duration < byte2) { // Fade sound1 out sound1Duration -= fadeSpeed; if (sound1Duration >= (byte) 0xC8) sound1Duration = 0; // stop playing sound 1 } // Call any additional sound if (byte1 == -1) play(3, 30); // make an 'f' sound else if (byte1 == -2) play(3, 29); // make an 's' sound else if (byte1 == -3) play(3, 33); // make a 'th' sound else if (byte1 == -4) play(3, 27); // make a 'sh' sound } while (--duration); // Scan ahead to find a '.' or a '?' as this will change the pitch punctuationPitchDelta = 0; for (i = 6; i > 0; i--) { char next = phonemes[phonemeIn + (i * 2)]; if (next == 'i') // found a full stop punctuationPitchDelta = 6 - i; // Lower the pitch else if (next == 'h') // found a question mark punctuationPitchDelta = i - 6; // Raise the pitch } if (byte1 == 1) delay2(25); } // next phoneme } soundOff(); } /* * Speak an English command line of text */ void TTS::sayText(const char *original) { unsigned int i; if (textToPhonemes(original, s_vocab, g_text)) { sayPhonemes(g_text); } }