Compare commits

...

5 Commits

1
.gitignore vendored

@ -3,3 +3,4 @@ Win32
x64
sapicli.vcxproj.filters
sapicli.vcxproj.user
test

@ -6,6 +6,8 @@ A simple tool to generate audio from text.
Using [getoptW](https://github.com/bluebaroncanada/getoptW).
https://github.com/Essjay1/Windows-classic-samples/blob/master/Samples/Win7Samples/com/fundamentals/dcom/simple/sserver/sserver.cpp
https://github.com/nodejs/node/issues/16553
http://riaevangelist.github.io/node-ipc/
Development process
-------------------

@ -245,25 +245,6 @@ public:
return S_OK;
}
virtual STDMETHODIMP writeEventData(void *buf, size_t sz) = 0;
const WCHAR *event_names[15] = {
L"undefined",
L"startInputStream",
L"endInputStream",
L"voiceChange",
L"ttsBookmark",
L"wordBoundary",
L"phoneme",
L"sentenceBoundary",
L"viseme",
L"ttsAudioLevel",
};
STDMETHODIMP writeJsonEvent(const SPEVENT *ev) {
WCHAR buf[MAX_PATH];
ULONGLONG timeMs = ev->ullAudioStreamOffset * 1000 / 16000;
_snwprintf_s(buf, sizeof(buf) / sizeof(buf[0]), L"{\"type\":\"%s\",\"timeMs\":%llu}\n", event_names[ev->eEventId < 15 ? ev->eEventId : 0], timeMs);
writeEventData(buf, wcslen(buf) * sizeof(buf[0]));
return S_OK;
}
// FIXME: optimize by not allocating every time
STDMETHODIMP writeSpEvent(const SPEVENT *ev) {
CSpEvent cspev;
@ -287,18 +268,13 @@ public:
return S_OK;
}
STDMETHODIMP SetBaseStream(IStream *pStream, REFGUID rguidFormat, const WAVEFORMATEX *pWaveFormatEx) {
wprintf(L"SetBaseStream\n");
return S_OK;
}
STDMETHODIMP SetBaseStream(IStream *pStream, REFGUID rguidFormat, const WAVEFORMATEX *pWaveFormatEx) { return S_OK; }
STDMETHODIMP GetBaseStream(IStream **ppStream) {
wprintf(L"GetBaseStream\n");
return S_OK;
}
STDMETHODIMP GetBaseStream(IStream **ppStream) { return S_OK; }
virtual STDMETHODIMP BindToFile(LPCWSTR filename_, SPFILEMODE eMode, const GUID *pFormatId, const WAVEFORMATEX *pWaveFormatEx, ULONGLONG ullEventInterest_) {
wprintf(L"BindToFile filename_=\"%s\" ullEventInterest=0x%04llx\n", filename_, ullEventInterest_);
if(SP_IS_BAD_STRING_PTR(filename_) || eMode >= SPFM_NUM_MODES || SP_IS_BAD_OPTIONAL_READ_PTR(pFormatId))
return E_INVALIDARG;
filename = filename_;
ullEventInterest = ullEventInterest_;
@ -309,7 +285,7 @@ public:
if(isStdout) {
h = GetStdHandle(STD_OUTPUT_HANDLE);
} else {
h = CreateFileW(filename, GENERIC_WRITE, FILE_SHARE_READ, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, 0);
h = CreateFileW(filename_, GENERIC_WRITE, FILE_SHARE_READ, 0, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, 0);
if(h == INVALID_HANDLE_VALUE) {
DWORD e = GetLastError();
WCHAR buf[MAX_PATH];
@ -323,19 +299,16 @@ public:
}
virtual STDMETHODIMP Close(void) {
wprintf(L"Close\n");
if(!isStdout && h) {
BOOL b = CloseHandle(h);
if(!b) {
DWORD e = GetLastError();
WCHAR buf[MAX_PATH];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, e, 0, buf, sizeof(buf) / sizeof(buf[0]), 0);
fwprintf(stderr, L"Could not close \"%s\": %d (%s)", filename, e, buf);
return HRESULT_FROM_WIN32(e);
}
}
if(isStdout || !h) return S_OK;
return S_OK;
BOOL b = CloseHandle(h);
if(b) return S_OK;
DWORD e = GetLastError();
WCHAR buf[MAX_PATH];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, e, 0, buf, sizeof(buf) / sizeof(buf[0]), 0);
fwprintf(stderr, L"Could not close \"%s\": %d (%s)", filename, e, buf);
return HRESULT_FROM_WIN32(e);
}
};
@ -343,7 +316,7 @@ class RawSpStream: public BaseSpStream {
public:
HANDLE eh; // events file handle
RawSpStream() {}
RawSpStream(): eh(0) {}
virtual STDMETHODIMP BindToFile(LPCWSTR filename_, SPFILEMODE eMode, const GUID *pFormatId, const WAVEFORMATEX *pWaveFormatEx, ULONGLONG ullEventInterest_) {
HRESULT hr = BaseSpStream::BindToFile(filename_, eMode, pFormatId, pWaveFormatEx, ullEventInterest_);
@ -351,8 +324,8 @@ public:
if(isStdout) {
eh = (HANDLE)_get_osfhandle(3);
} else if(ullEventInterest) {
fwprintf(stderr, L"Cannot select events when output is not stdout\n");
} else if(ullEventInterest_) {
fwprintf(stderr, L"Cannot select events (0x%04llx) when output is not stdout\n", ullEventInterest_);
return E_INVALIDARG;
}
@ -376,101 +349,254 @@ class OggSpStream: public BaseSpStream {
public:
ogg_stream_state ogg_voice_st;
ogg_stream_state ogg_events_st;
ogg_page og;
ogg_packet op;
ULONG granulepos;
ULONG packetno;
ULONG packetNo, eventpacketNo;
OggSpStream() {}
// FIXME: error checking
virtual STDMETHODIMP BindToFile(LPCWSTR filename_, SPFILEMODE eMode, const GUID *pFormatId, const WAVEFORMATEX *pWaveFormatEx, ULONGLONG ullEventInterest_) {
HRESULT hr = BaseSpStream::BindToFile(filename_, eMode, pFormatId, pWaveFormatEx, ullEventInterest_);
if(FAILED(hr)) {
fwprintf(stderr, L"Could not bind to file %s: %d %s\n", filename_, hr, getErrorString(hr));
return hr;
}
if(ogg_stream_init(&ogg_voice_st, 1)) {
fwprintf(stderr, L"Could not initialize ogg stream\n");
return E_FAIL;
}
if(ogg_stream_init(&ogg_events_st, 2)) {
fwprintf(stderr, L"Could not initialize ogg stream\n");
return E_FAIL;
}
granulepos = packetNo = eventpacketNo = 0;
return S_OK;
}
STDMETHODIMP writeEventHead() {
if(ullEventInterest == 0) return S_OK;
unsigned char evntHead[8] = { 'S', 'A', 'P', 'I', 'E', 'V', 'N', 'T' };
ogg_packet p;
p.packet = evntHead;
p.bytes = 8;
p.b_o_s = 1;
p.e_o_s = 0;
p.granulepos = 0;
p.packetno = eventpacketNo++;
if(ogg_stream_packetin(&ogg_events_st, &p)) {
fwprintf(stderr, L"Could not add the header packet to the events stream\n");
return E_FAIL;
}
return flushStream(&ogg_events_st);
}
// FIXME: error checking
STDMETHODIMP writeEventData(void *buf, size_t sz) {
if(ullEventInterest == 0) return S_OK;
ogg_packet p;
p.packet = (unsigned char *)buf;
p.bytes = (ULONG)sz;
p.e_o_s = 0;
p.b_o_s = 0;
p.granulepos = granulepos;
p.packetno = eventpacketNo++;
if(ogg_stream_packetin(&ogg_events_st, &p)) {
fwprintf(stderr, L"Could not add a data packet of length %lu to the events stream\n", (ULONG)sz);
return E_FAIL;
}
return pageoutStream(&ogg_events_st);
}
STDMETHODIMP flushEventStream(void) {
if(ullEventInterest == 0) return S_OK;
ogg_packet p;
p.packet = 0;
p.bytes = 0;
p.b_o_s = 0;
p.e_o_s = 1;
p.granulepos = granulepos;
p.packetno = eventpacketNo;
if(ogg_stream_packetin(&ogg_events_st, &p)) {
fwprintf(stderr, L"Could not add the final packet to the events stream\n");
return E_FAIL;
}
return flushStream(&ogg_events_st);
}
virtual STDMETHODIMP Write(const void *buf, ULONG size, ULONG *newPos) = 0;
HRESULT STDMETHODCALLTYPE Close() {
flushEventStream();
if(ogg_stream_clear(&ogg_voice_st)) {
fwprintf(stderr, L"Could not clear voice stream\n");
return E_FAIL;
}
if(ogg_stream_clear(&ogg_events_st)) {
fwprintf(stderr, L"Could not clear events stream\n");
return E_FAIL;
}
return BaseSpStream::Close();
}
STDMETHODIMP writePage(ogg_page *p) {
BOOL r = WriteFile(h, p->header, p->header_len, 0, 0);
if(r) r = WriteFile(h, p->body, p->body_len, 0, 0);
if(r) return S_OK;
DWORD e = GetLastError();
WCHAR buf[MAX_PATH];
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM, 0, e, 0, buf, sizeof(buf) / sizeof(buf[0]), 0);
fwprintf(stderr, L"Could not write to %s: %d (%s)", isStdout ? L"stdout" : filename, e, buf);
return HRESULT_FROM_WIN32(e);
}
STDMETHODIMP pageoutStream(ogg_stream_state *os) {
ogg_page p;
while(ogg_stream_pageout(os, &p)) {
HRESULT hr = writePage(&p);
if(hr != S_OK) {
fwprintf(stderr, L"Could not write page: %d %s\n", hr, getErrorString(hr));
return hr;
}
}
return S_OK;
}
STDMETHODIMP flushStream(ogg_stream_state *os) {
ogg_page p;
while(ogg_stream_flush(os, &p)) {
HRESULT hr = writePage(&p);
if(hr != S_OK) {
fwprintf(stderr, L"Could not write page: %d %s\n", hr, getErrorString(hr));
return hr;
}
}
return S_OK;
}
};
class OggVorbisSpStream: public OggSpStream {
public:
vorbis_info vi;
vorbis_comment vc;
vorbis_dsp_state vd;
vorbis_block vb;
OggSpStream() {}
OggVorbisSpStream() {}
// FIXME: error checking
virtual STDMETHODIMP BindToFile(LPCWSTR filename_, SPFILEMODE eMode, const GUID *pFormatId, const WAVEFORMATEX *pWaveFormatEx, ULONGLONG ullEventInterest_) {
BaseSpStream::BindToFile(filename_, eMode, pFormatId, pWaveFormatEx, ullEventInterest_);
STDMETHODIMP BindToFile(LPCWSTR filename_, SPFILEMODE eMode, const GUID *pFormatId, const WAVEFORMATEX *pWaveFormatEx, ULONGLONG ullEventInterest_) {
OggSpStream::BindToFile(filename_, eMode, pFormatId, pWaveFormatEx, ullEventInterest_);
vorbis_info_init(&vi);
vorbis_encode_init_vbr(&vi, wfex.nChannels, wfex.nSamplesPerSec, 0.1f);
vorbis_comment_init(&vc);
vorbis_comment_add_tag(&vc, "ENCODER", "sapicli");
vorbis_analysis_init(&vd, &vi);
vorbis_block_init(&vd, &vb);
ogg_stream_init(&ogg_voice_st, 1);
ogg_stream_init(&ogg_events_st, 2);
granulepos = 0;
packetno = 0;
{
ogg_packet header;
ogg_packet header_comm;
ogg_packet header_code;
vorbis_analysis_headerout(&vd, &vc, &header, &header_comm, &header_code);
ogg_stream_packetin(&ogg_voice_st, &header); /* automatically placed in its own page */
while(ogg_stream_flush(&ogg_voice_st, &og)) {
WriteFile(h, og.header, og.header_len, 0, 0);
WriteFile(h, og.body, og.body_len, 0, 0);
}
if(ullEventInterest != 0) {
unsigned char evntHead[8] = { 'S', 'A', 'P', 'I', 'E', 'V', 'N', 'T' };
ogg_packet p;
memset(&p, 0, sizeof(p));
p.packet = evntHead;
p.bytes = 8;
p.b_o_s = 1;
ogg_stream_packetin(&ogg_events_st, &p);
while(ogg_stream_flush(&ogg_events_st, &og)) {
WriteFile(h, og.header, og.header_len, 0, 0);
WriteFile(h, og.body, og.body_len, 0, 0);
}
}
ogg_packet header;
ogg_packet header_comm;
ogg_packet header_code;
ogg_stream_packetin(&ogg_voice_st, &header_comm);
ogg_stream_packetin(&ogg_voice_st, &header_code);
while(ogg_stream_flush(&ogg_voice_st, &og)) {
WriteFile(h, og.header, og.header_len, 0, 0);
WriteFile(h, og.body, og.body_len, 0, 0);
}
}
vorbis_analysis_headerout(&vd, &vc, &header, &header_comm, &header_code);
ogg_stream_packetin(&ogg_voice_st, &header); /* automatically placed in its own page */
HRESULT hr = flushStream(&ogg_voice_st);
if(hr != S_OK) return hr;
return S_OK;
hr = writeEventHead();
if(hr != S_OK) return hr;
ogg_stream_packetin(&ogg_voice_st, &header_comm);
ogg_stream_packetin(&ogg_voice_st, &header_code);
return flushStream(&ogg_voice_st);
}
HRESULT STDMETHODCALLTYPE Write(const void *buf, ULONG size, ULONG *newPos) {
int eos = 0;
if(size == 0) {
vorbis_analysis_wrote(&vd, 0);
} else {
int nSamples = size * 8 / wfex.wBitsPerSample/ wfex.nChannels;
int nSamples = size * 8 / wfex.wBitsPerSample / wfex.nChannels;
granulepos += nSamples;
float **buffer = vorbis_analysis_buffer(&vd, nSamples);
for(int j = 0; j < wfex.nChannels; j++) {
float *sample = buffer[j];
/* Optimized copy for common combination of bit depths and numbers of channels */
if(wfex.wBitsPerSample == 8 && wfex.nChannels == 1) {
char *srcSample = (char *)buf;
float *sample0 = buffer[0];
for(int i = 0; i < nSamples; i++) {
float sm = wfex.wBitsPerSample == 16 ? ((short *)buf)[i] / 32768.f : ((char *)buf)[i] / 256.f;
*(sample++) = sm;
*(sample0++) = *(srcSample++) / 128.f;
}
} else if(wfex.wBitsPerSample == 8 && wfex.nChannels == 2) {
char *srcSample = (char *)buf;
float *sample0 = buffer[0];
float *sample1 = buffer[1];
for(int i = 0; i < nSamples; i++) {
*(sample0++) = *(srcSample++) / 128.f;
*(sample1++) = *(srcSample++) / 128.f;
}
} else if(wfex.wBitsPerSample == 16 && wfex.nChannels == 1) {
short *srcSample = (short *)buf;
float *sample0 = buffer[0];
for(int i = 0; i < nSamples; i++) {
*(sample0++) = *(srcSample++) / 32768.f;
}
} else if(wfex.wBitsPerSample == 16 && wfex.nChannels == 2) {
short *srcSample = (short *)buf;
float *sample0 = buffer[0];
float *sample1 = buffer[1];
for(int i = 0; i < nSamples; i++) {
*(sample0++) = *(srcSample++) / 32768.f;
*(sample1++) = *(srcSample++) / 32768.f;
}
} else {
/* Generic, rarely used, slow method */
int bytesPerSample = (wfex.wBitsPerSample + 7) >> 3;
float divisor = (float)(1 << (wfex.wBitsPerSample - 1));
char *startSrcSample = (char *)buf;
int strideSkip = bytesPerSample * wfex.nChannels;
for(int j = 0; j < wfex.nChannels; j++) {
float *sample = buffer[j];
char *srcSample = startSrcSample;
for(int i = 0; i < nSamples; i++) {
LONGLONG srcSampleAccum = (srcSample[bytesPerSample - 1] < 0) ? -1 : 0;
memcpy((void *)&srcSampleAccum, srcSample, bytesPerSample);
srcSample += strideSkip;
*(sample++) = (float)srcSampleAccum / divisor;
}
startSrcSample += bytesPerSample;
}
}
vorbis_analysis_wrote(&vd, nSamples);
}
int eos = 0;
while(vorbis_analysis_blockout(&vd, &vb) == 1) {
vorbis_analysis(&vb, NULL);
vorbis_bitrate_addblock(&vb);
while(vorbis_bitrate_flushpacket(&vd, &op)) {
ogg_stream_packetin(&ogg_voice_st, &op);
ogg_packet p;
while(vorbis_bitrate_flushpacket(&vd, &p)) {
ogg_stream_packetin(&ogg_voice_st, &p);
while(!eos) {
int result = ogg_stream_pageout(&ogg_voice_st, &og);
ogg_page p;
int result = ogg_stream_pageout(&ogg_voice_st, &p);
if(result == 0) break;
WriteFile(h, og.header, og.header_len, 0, 0);
WriteFile(h, og.body, og.body_len, 0, 0);
writePage(&p);
if(ogg_page_eos(&og)) eos = 1;
if(ogg_page_eos(&p)) eos = 1;
}
}
}
@ -478,45 +604,190 @@ public:
if(newPos) *newPos += size;
return S_OK;
}
HRESULT STDMETHODCALLTYPE Close() {
Write(0, 0, 0);
if(ullEventInterest != 0) {
ogg_packet p;
memset(&p, 0, sizeof(p));
p.packet = 0;
p.bytes = 0;
p.e_o_s = 1;
ogg_stream_packetin(&ogg_events_st, &p);
while(ogg_stream_pageout(&ogg_events_st, &og)) {
WriteFile(h, og.header, og.header_len, 0, 0);
WriteFile(h, og.body, og.body_len, 0, 0);
}
}
ogg_stream_clear(&ogg_voice_st);
ogg_stream_clear(&ogg_events_st);
vorbis_block_clear(&vb);
vorbis_dsp_clear(&vd);
vorbis_comment_clear(&vc);
vorbis_info_clear(&vi);
return BaseSpStream::Close();
return OggSpStream::Close();
}
};
class OggOpusSpStream: public OggSpStream {
public:
OpusEncoder *enc;
opus_int16 frame[2880 * 2]; // max frame size times two channels
int framepos;
int framesize;
OggOpusSpStream(): enc(0), framepos(0), framesize(960) {}
const WCHAR *getOpusErrorString(int err) {
switch(err) {
case OPUS_OK: return L"No error";
case OPUS_BAD_ARG: return L"One or more invalid / out of range arguments.";
case OPUS_BUFFER_TOO_SMALL: return L"Not enough bytes allocated in the buffer.";
case OPUS_INTERNAL_ERROR: return L"An internal error was detected.";
case OPUS_INVALID_PACKET: return L"The compressed data passed is corrupted.";
case OPUS_UNIMPLEMENTED: return L"Invalid / unsupported request number.";
case OPUS_INVALID_STATE: return L"An encoder or decoder structure is invalid or already freed.";
case OPUS_ALLOC_FAIL: return L"Memory allocation has failed.";
}
return L"Unknown error";
}
HRESULT opusToHresult(int err) {
switch(err) {
case OPUS_OK: return S_OK;
case OPUS_BAD_ARG: return E_INVALIDARG;
case OPUS_BUFFER_TOO_SMALL: return E_NOT_SUFFICIENT_BUFFER;
case OPUS_INTERNAL_ERROR: return E_FAIL;
case OPUS_INVALID_PACKET: return E_INVALID_PROTOCOL_FORMAT;
case OPUS_UNIMPLEMENTED: return E_NOTIMPL;
case OPUS_INVALID_STATE: return E_FAIL;
case OPUS_ALLOC_FAIL: return E_FAIL;
}
return E_FAIL;
}
// FIXME: error checking
STDMETHODIMP writeEventData(void *buf, size_t sz) {
return S_OK;
ogg_packet p;
p.packet = (unsigned char *)buf;
p.bytes = (ULONG)sz;
ogg_stream_packetin(&ogg_voice_st, &p);
STDMETHODIMP BindToFile(LPCWSTR filename_, SPFILEMODE eMode, const GUID *pFormatId, const WAVEFORMATEX *pWaveFormatEx, ULONGLONG ullEventInterest_) {
OggSpStream::BindToFile(filename_, eMode, pFormatId, pWaveFormatEx, ullEventInterest_);
int err;
if(wfex.wBitsPerSample != 16) {
fwprintf(stderr, L"Only 16 bit depth is supported for opus\n");
return E_INVALIDARG;
}
enc = opus_encoder_create(wfex.nSamplesPerSec, wfex.nChannels, OPUS_APPLICATION_VOIP, &err);
if(err != OPUS_OK) {
fwprintf(stderr, L"Error creating encoder: %d %s\n", err, getOpusErrorString(err));
return opusToHresult(err);
}
opus_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
framesize = wfex.nSamplesPerSec * 20 / 1000;
ogg_packet header;
int lookahead = 3840;
opus_encoder_ctl(enc, OPUS_GET_LOOKAHEAD(&lookahead));
unsigned char opusHeader[19] = {
'O', 'p', 'u', 's',
'H', 'e', 'a', 'd',
1,
(unsigned char)wfex.nChannels,
(unsigned char)(lookahead >> 0),
(unsigned char)(lookahead >> 8),
(unsigned char)(wfex.nSamplesPerSec >> 0),
(unsigned char)(wfex.nSamplesPerSec >> 8),
(unsigned char)(wfex.nSamplesPerSec >> 16),
(unsigned char)(wfex.nSamplesPerSec >> 24),
0x00, 0x00,
0
};
header.packet = opusHeader;
header.bytes = sizeof(opusHeader);
header.b_o_s = 1;
header.e_o_s = 0;
header.granulepos = 0;
header.packetno = packetNo++;
ogg_stream_packetin(&ogg_voice_st, &header);
HRESULT hr = flushStream(&ogg_voice_st);
if(hr != S_OK) return hr;
hr = writeEventHead();
if(hr != S_OK) return hr;
unsigned char opusTags[42] = {
'O', 'p', 'u', 's',
'T', 'a', 'g', 's',
7, 0, 0, 0,
's', 'a', 'p', 'i', 'c', 'l', 'i',
1, 0, 0, 0,
15, 0, 0, 0,
'E', 'N', 'C', 'O', 'D', 'E', 'R', '=',
's', 'a', 'p', 'i', 'c', 'l', 'i'
};
header.packet = opusTags;
header.bytes = sizeof(opusTags);
header.b_o_s = 0;
header.e_o_s = 0;
header.granulepos = 0;
header.packetno = packetNo++;
ogg_stream_packetin(&ogg_voice_st, &header);
return flushStream(&ogg_voice_st);
}
while(1) {
int result = ogg_stream_pageout(&ogg_events_st, &og);
if(result == 0) break;
WriteFile(h, og.header, og.header_len, 0, 0);
WriteFile(h, og.body, og.body_len, 0, 0);
STDMETHODIMP Write(const void *buf, ULONG size, ULONG *newPos) {
int nSamples = size * 8 / wfex.wBitsPerSample / wfex.nChannels;
unsigned char encbuf[4096];
if(wfex.wBitsPerSample == 16) {
short *samples = (short *)buf;
for(int x = 0; x < nSamples; x++) {
for(int i = 0; i < wfex.nChannels; i++) {
frame[framepos * wfex.nChannels + i] = *(samples++);
}
framepos++;
if(framepos >= framesize) {
int encoded = opus_encode(enc, frame, framesize, encbuf, sizeof(encbuf));
if(encoded < 0) {
fwprintf(stderr, L"Could not encode %d samples of opus data %d %s\n", framesize, encoded, getOpusErrorString(encoded));
return E_FAIL;
}
if(encoded > 2) {
ogg_packet p;
p.packet = encbuf;
p.bytes = encoded;
p.b_o_s = p.e_o_s = 0;
granulepos += framesize;
p.granulepos = granulepos;
p.packetno = packetNo++;
if(ogg_stream_packetin(&ogg_voice_st, &p)) {
fwprintf(stderr, L"Could not write opus voice packet of length %d to ogg stream\n", p.bytes);
return E_FAIL;
}
HRESULT hr = pageoutStream(&ogg_voice_st);
if(hr != S_OK) return hr;
}
framepos = 0;
}
}
}
if(newPos) *newPos += size;
return S_OK;
}
STDMETHODIMP Close() {
unsigned char encbuf[4096];
memset(frame + framepos * wfex.nChannels, 0, (framesize - framepos) * wfex.nChannels);
int encoded = opus_encode(enc, frame, framesize, encbuf, sizeof(encbuf));
if(encoded < 0) {
fwprintf(stderr, L"Could not encode final %d (%d) samples of opus data %d %s\n", framesize, framepos, encoded, getOpusErrorString(encoded));
return E_FAIL;
}
ogg_packet p;
p.packet = encbuf;
p.bytes = encoded > 2 ? encoded : 0;
p.b_o_s = 0;
p.e_o_s = 1;
granulepos += framepos;
p.granulepos = granulepos;
p.packetno = packetNo++;
if(ogg_stream_packetin(&ogg_voice_st, &p)) {
fwprintf(stderr, L"Could not add final packet to voice stream\n");
return E_FAIL;
}
HRESULT hr = flushStream(&ogg_voice_st);
if(hr != S_OK) return hr;
opus_encoder_destroy(enc);
enc = 0;
return OggSpStream::Close();
}
};
int speakToWav(WCHAR *text, WCHAR *voiceId, WCHAR *wavFilename, DWORD outType, int rate, int volume, DWORD speakFlags, DWORD samplesPerSec, WORD bitsPerSample, WORD nChannels, ULONGLONG ullEventInterest) {
@ -578,15 +849,17 @@ int speakToWav(WCHAR *text, WCHAR *voiceId, WCHAR *wavFilename, DWORD outType, i
ISpStream *outputStream = 0;
if(outType == 2) {
if(outType == 1) {
outputStream = new RawSpStream();
} else if(outType == 2) {
HRESULT hr = ::CoCreateInstance(CLSID_SpStream, NULL, CLSCTX_ALL, __uuidof(outputStream), (void **)&outputStream);
if(FAILED(hr)) {
fwprintf(stderr, L"Could not instantiate SpStream: %d %s\n", hr, getErrorString(hr));
}
} else if(outType == 3) {
outputStream = new OggSpStream();
} else if(outType == 1) {
outputStream = new RawSpStream();
outputStream = new OggVorbisSpStream();
} else if(outType == 4) {
outputStream = new OggOpusSpStream();
} else {
fwprintf(stderr, L"Invalid output type %d\n", outType);
return E_INVALIDARG;
@ -623,7 +896,14 @@ int speakToWav(WCHAR *text, WCHAR *voiceId, WCHAR *wavFilename, DWORD outType, i
return 1;
}
outputStream->Close();
// Release here so the destructor doesn't do it after we've closed the output file
voice.Release();
hr = outputStream->Close();
if(FAILED(hr)) {
fwprintf(stderr, L"Could not close %s: %d %s\n", wavFilename, hr, getErrorString(hr));
return 1;
}
if(voiceId && voiceId[0])
voiceToken.Release();
@ -687,8 +967,10 @@ int wmain(int argc, WCHAR *argv[]) {
outType = 1;
else if(!_wcsicmp(optarg, L"wav"))
outType = 2;
else if(!_wcsicmp(optarg, L"ogg"))
else if(!_wcsicmp(optarg, L"ogg") || !_wcsicmp(optarg, L"ogg+vorbis"))
outType = 3;
else if(!_wcsicmp(optarg, L"ogg+opus"))
outType = 4;
else
help = 1;
break;
@ -744,7 +1026,8 @@ int wmain(int argc, WCHAR *argv[]) {
L" Use `-' for stdout.\n"
L" -T, --out-type=TYPE Output file type. Default is `auto'\n"
L" `wav' for RIFF .wav\n"
L" `ogg' for Ogg Vorbis\n"
L" `ogg' or `ogg+vorbis' for Ogg Vorbis\n"
L" `ogg+opus' for Ogg Opus\n"
L" `raw' for raw PCM samples\n"
L" `auto' to autodetect from file extension\n"
L" -v, --voice=VOICE Select voice.\n"

@ -165,7 +165,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\Debug\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Debug\libvorbis_static.lib</AdditionalDependencies>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\Debug\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Debug\libvorbis_static.lib;$(SolutionDir)..\opus\win32\VS2015\$(PlatformName)\Debug\opus.lib</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugDynamic|Win32'">
@ -179,7 +179,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\DebugDLL\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Debug\libvorbis.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\DebugDLL\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Debug\libvorbis.lib;$(SolutionDir)..\opus\win32\VS2015\$(PlatformName)\DebugDLL\opus.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseStatic|Win32'">
@ -197,7 +197,7 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\Release\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Release\libvorbis_static.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\Release\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Release\libvorbis_static.lib;$(SolutionDir)..\opus\win32\VS2015\$(PlatformName)\Release\opus.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseDynamic|Win32'">
@ -215,7 +215,7 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\ReleaseDLL\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Release\libvorbis.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\ReleaseDLL\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Release\libvorbis.lib;$(SolutionDir)..\opus\win32\VS2015\$(PlatformName)\ReleaseDLL\opus.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugStatic|x64'">
@ -229,7 +229,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\Debug\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Debug\libvorbis_static.lib</AdditionalDependencies>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\Debug\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Debug\libvorbis_static.lib;$(SolutionDir)..\opus\win32\VS2015\$(PlatformName)\Debug\opus.lib</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='DebugDynamic|x64'">
@ -243,7 +243,7 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\DebugDLL\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Debug\libvorbis.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\DebugDLL\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Debug\libvorbis.lib;$(SolutionDir)..\opus\win32\VS2015\$(PlatformName)\DebugDLL\opus.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseStatic|x64'">
@ -261,7 +261,7 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\Release\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Release\libvorbis_static.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\Release\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Release\libvorbis_static.lib;$(SolutionDir)..\opus\win32\VS2015\$(PlatformName)\Release\opus.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='ReleaseDynamic|x64'">
@ -279,13 +279,17 @@
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\ReleaseDLL\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Release\libvorbis.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(SolutionDir)..\libogg\win32\VS2015\$(PlatformName)\ReleaseDLL\libogg.lib;$(SolutionDir)..\libvorbis\win32\VS2010\$(PlatformName)\Release\libvorbis.lib;$(SolutionDir)..\opus\win32\VS2015\$(PlatformName)\ReleaseDLL\opus.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="sapicli.cpp" />
<ClCompile Include="getoptw.c" />
</ItemGroup>
<ItemGroup>
<None Include="test.bat" />
<None Include="zip.bat" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>

@ -0,0 +1,31 @@
@echo off
setlocal enableDelayedExpansion
SET TEXT="Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum."
FOR %%P IN (Win32 x64) DO (
FOR %%C IN (ReleaseStatic) DO (
FOR %%F IN (raw wav ogg+vorbis ogg+opus) DO (
SET EXT=%%F
IF !EXT!==ogg+vorbis SET EXT=ogg
IF !EXT!==ogg+opus SET EXT=ogg
FOR %%S IN (8000 11025 12000 16000 22050 24000 44100 48000) DO (
FOR %%B IN (8 12 16 24 32) DO (
FOR %%N IN (1 2 3) DO (
FOR %%E IN (0 288 all) DO (
FOR %%V IN (DAVID ZIRA) DO (
DEL stdout.txt
DEL stderr.txt
SET STDOUT=
SET STDERR=
%%P\%%C\sapicli.exe -T %%F -s %%S -b %%B -c %%N -e %%E -v TTS_MS_EN-US_%%V_11.0 -o test\%%P_%%C_%%F_%%S_%%B_%%N_%%E_%%V.!EXT! %TEXT% > stdout.txt 2> stderr.txt
SET L=!errorlevel!
SET /p STDOUT=<stdout.txt
SET /p STDERR=<stderr.txt
echo %%P,%%C,%%F,%%S,%%B,%%N,%%E,%%V,!L!,!STDOUT!,!STDERR!
)
)
)
)
)
)
)
)

@ -0,0 +1,7 @@
SET SZ="C:\Program Files\7-Zip\7z.exe"
SET V=0.3
pushd Win32\ReleaseStatic && (%SZ% a ..\..\sapicli-%V%-x86-static.zip sapicli.exe & popd)
pushd Win32\ReleaseDynamic && (%SZ% a ..\..\sapicli-%V%-x86.zip sapicli.exe ogg.dll libvorbis.dll opus.dll & popd)
pushd x64\ReleaseStatic && (%SZ% a ..\..\sapicli-%V%-x64-static.zip sapicli.exe & popd)
pushd x64\ReleaseDynamic && (%SZ% a ..\..\sapicli-%V%-x64.zip sapicli.exe libvorbis.dll opus.dll & popd)
Loading…
Cancel
Save