Updating TtsEngine.h and SynthProxy.cpp so that buffer memory
management is handled on the system side.
diff --git a/include/tts/TtsEngine.h b/include/tts/TtsEngine.h
index bf62995..e0220ea 100644
--- a/include/tts/TtsEngine.h
+++ b/include/tts/TtsEngine.h
@@ -25,24 +25,29 @@
namespace android {
+enum tts_synth_status {
+ TTS_SYNTH_DONE = 0,
+ TTS_SYNTH_PENDING = 1
+};
+
+enum tts_callback_status {
+ TTS_CALLBACK_HALT = 0,
+ TTS_CALLBACK_CONTINUE = 1
+};
+
// The callback is used by the implementation of this interface to notify its
// client, the Android TTS service, that the last requested synthesis has been
-// completed.
+// completed. // TODO reword
// The callback for synthesis completed takes:
-// void * - The userdata pointer set in the original synth call
-// uint32_t - Track sampling rate in Hz
-// audio_format - The AudioSystem::audio_format enum
-// int - The number of channels
-// int8_t * - A buffer of audio data only valid during the execution of the callback
-// size_t - The size of the buffer
-// Note about memory management:
-// The implementation of TtsEngine is responsible for the management of the memory
-// it allocates to store the synthesized speech. After the execution of the callback
-// to hand the synthesized data to the client of TtsEngine, the TTS engine is
-// free to reuse or free the previously allocated memory.
-// This implies that the implementation of the "synthDoneCB" callback cannot use
-// the pointer to the buffer of audio samples outside of the callback itself.
-typedef void (synthDoneCB_t)(void *, uint32_t, AudioSystem::audio_format, int, int8_t *, size_t);
+// [inout] void *& - The userdata pointer set in the original synth call
+// [in] uint32_t - Track sampling rate in Hz
+// [in] audio_format - The AudioSystem::audio_format enum
+// [in] int - The number of channels
+// [inout] int8_t *& - A buffer of audio data only valid during the execution of the callback
+// [inout] size_t & - The size of the buffer
+// [in] tts_synth_status - Status of the synthesis; 0 for done, 1 for more data to be synthesized.
+// Returns the status of the consumer of the synthesis. 0 for stop, 1 for continue.
+typedef tts_callback_status (synthDoneCB_t)(void *&, uint32_t, AudioSystem::audio_format, int, int8_t *&, size_t&, tts_synth_status);
class TtsEngine;
extern "C" TtsEngine* getTtsEngine();
@@ -155,13 +160,13 @@
// @param text the UTF-8 text to synthesize
// @param userdata pointer to be returned when the call is invoked
// @return TTS_SUCCESS or TTS_FAILURE
- virtual tts_result synthesizeText(const char *text, void *userdata);
+ virtual tts_result synthesizeText(const char *text, int8_t *buffer, size_t bufferSize, void *userdata);
// Synthesize IPA text. When synthesis completes, the engine must call the given callback to notify the TTS API.
// @param ipa the IPA data to synthesize
// @param userdata pointer to be returned when the call is invoked
// @return TTS_FEATURE_UNSUPPORTED if IPA is not supported, otherwise TTS_SUCCESS or TTS_FAILURE
- virtual tts_result synthesizeIpa(const char *ipa, void *userdata);
+ virtual tts_result synthesizeIpa(const char *ipa, int8_t *buffer, size_t bufferSize, void *userdata);
};
} // namespace android
diff --git a/tts/jni/android_tts_SynthProxy.cpp b/tts/jni/android_tts_SynthProxy.cpp
old mode 100755
new mode 100644
index d8f1bf3..582e621
--- a/tts/jni/android_tts_SynthProxy.cpp
+++ b/tts/jni/android_tts_SynthProxy.cpp
@@ -32,6 +32,7 @@
#define DEFAULT_TTS_RATE 16000
#define DEFAULT_TTS_FORMAT AudioSystem::PCM_16_BIT
#define DEFAULT_TTS_NB_CHANNELS 1
+#define DEFAULT_TTS_BUFFERSIZE 1024
#define USAGEMODE_PLAY_IMMEDIATELY 0
#define USAGEMODE_WRITE_TO_FILE 1
@@ -64,6 +65,8 @@
uint32_t mSampleRate;
AudioSystem::audio_format mAudFormat;
int mNbChannels;
+ int8_t * mBuffer;
+ size_t mBufferSize;
SynthProxyJniStorage() {
//tts_class = NULL;
@@ -73,6 +76,8 @@
mSampleRate = DEFAULT_TTS_RATE;
mAudFormat = DEFAULT_TTS_FORMAT;
mNbChannels = DEFAULT_TTS_NB_CHANNELS;
+ mBufferSize = DEFAULT_TTS_BUFFERSIZE;
+ mBuffer = new int8_t[mBufferSize];
}
~SynthProxyJniStorage() {
@@ -81,6 +86,7 @@
mNativeSynthInterface->shutdown();
mNativeSynthInterface = NULL;
}
+ delete mBuffer;
}
void killAudio() {
@@ -159,23 +165,27 @@
* Callback from TTS engine.
* Directly speaks using AudioTrack or write to file
*/
-static void ttsSynthDoneCB(void * userdata, uint32_t rate,
+static tts_callback_status ttsSynthDoneCB(void *& userdata, uint32_t rate,
AudioSystem::audio_format format, int channel,
- int8_t *wav, size_t bufferSize) {
+ int8_t *&wav, size_t &bufferSize, tts_synth_status status) {
LOGI("ttsSynthDoneCallback: %d bytes", bufferSize);
+ if (userdata == NULL){
+ LOGE("userdata == NULL");
+ return TTS_CALLBACK_HALT;
+ }
afterSynthData_t* pForAfter = (afterSynthData_t*)userdata;
+ SynthProxyJniStorage* pJniData = (SynthProxyJniStorage*)(pForAfter->jniStorage);
if (pForAfter->usageMode == USAGEMODE_PLAY_IMMEDIATELY){
LOGI("Direct speech");
if (wav == NULL) {
+ delete pForAfter;
LOGI("Null: speech has completed");
}
if (bufferSize > 0) {
- SynthProxyJniStorage* pJniData =
- (SynthProxyJniStorage*)(pForAfter->jniStorage);
prepAudioTrack(pJniData, rate, format, channel);
if (pJniData->mAudioOut) {
pJniData->mAudioOut->write(wav, bufferSize);
@@ -187,6 +197,7 @@
} else if (pForAfter->usageMode == USAGEMODE_WRITE_TO_FILE) {
LOGI("Save to file");
if (wav == NULL) {
+ delete pForAfter;
LOGI("Null: speech has completed");
}
if (bufferSize > 0){
@@ -195,10 +206,17 @@
}
// TODO update to call back into the SynthProxy class through the
// javaTTSFields.synthProxyMethodPost methode to notify
- // playback has completed
+ // playback has completed if the synthesis is done, i.e.
+ // if status == TTS_SYNTH_DONE
+ //delete pForAfter;
- delete pForAfter;
- return;
+ // we don't update the wav (output) parameter as we'll let the next callback
+ // write at the same location, we've consumed the data already, but we need
+ // to update bufferSize to let the TTS engine know how much it can write the
+ // next time it calls this function.
+ bufferSize = pJniData->mBufferSize;
+
+ return TTS_CALLBACK_CONTINUE;
}
@@ -223,7 +241,9 @@
} else {
TtsEngine *(*get_TtsEngine)() =
reinterpret_cast<TtsEngine* (*)()>(dlsym(engine_lib_handle, "getTtsEngine"));
+
pJniStorage->mNativeSynthInterface = (*get_TtsEngine)();
+
if (pJniStorage->mNativeSynthInterface) {
pJniStorage->mNativeSynthInterface->init(ttsSynthDoneCB);
}
@@ -323,7 +343,7 @@
// TODO check return codes
if (pSynthData->mNativeSynthInterface) {
- pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
+ pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
(void *)pForAfter);
}
@@ -395,7 +415,7 @@
if (pSynthData->mNativeSynthInterface) {
const char *textNativeString = env->GetStringUTFChars(textJavaString, 0);
- pSynthData->mNativeSynthInterface->synthesizeText(textNativeString,
+ pSynthData->mNativeSynthInterface->synthesizeText(textNativeString, pSynthData->mBuffer, pSynthData->mBufferSize,
(void *)pForAfter);
env->ReleaseStringUTFChars(textJavaString, textNativeString);
}
@@ -442,6 +462,7 @@
android_tts_SynthProxy_playAudioBuffer(JNIEnv *env, jobject thiz, jint jniData,
int bufferPointer, int bufferSize)
{
+LOGI("android_tts_SynthProxy_playAudioBuffer");
if (jniData == 0) {
LOGE("android_tts_SynthProxy_playAudioBuffer(): invalid JNI data");
return;