From 815293446fd60c8bd287249c27b1b6edf99b9af5 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Thu, 20 Feb 2025 10:31:06 +0800 Subject: [PATCH 01/21] Make SAPI4 voices use WASAPI --- source/synthDrivers/_sapi4.py | 80 +++++++- source/synthDrivers/sapi4.py | 360 ++++++++++++++++++++++++++++------ 2 files changed, 383 insertions(+), 57 deletions(-) diff --git a/source/synthDrivers/_sapi4.py b/source/synthDrivers/_sapi4.py index 5480b8d144..a1b9ca2a5b 100755 --- a/source/synthDrivers/_sapi4.py +++ b/source/synthDrivers/_sapi4.py @@ -17,9 +17,9 @@ sizeof, Structure, ) -from ctypes.wintypes import BYTE, DWORD, LPCWSTR, WORD +from ctypes.wintypes import BOOL, BYTE, DWORD, FILETIME, LPCWSTR, WORD from enum import IntEnum -from comtypes import GUID, IUnknown, STDMETHOD +from comtypes import GUID, IUnknown, STDMETHOD, COMMETHOD import winKernel @@ -43,6 +43,27 @@ TTSFEATURE_PITCH = 8 TTSFEATURE_FIXEDAUDIO = 1024 +# Audio related error codes +AUDERR_BADDEVICEID = -2147220735 +AUDERR_NEEDWAVEFORMAT = -2147220734 +AUDERR_NOTSUPPORTED = -2147467263 # E_NOTIMPL +AUDERR_NOTENOUGHDATA = -2147220991 +AUDERR_NOTPLAYING = -2147220730 +AUDERR_WAVEFORMATNOTSUPPORTED = -2147220990 +AUDERR_WAVEDEVICEBUSY = -2147220989 +AUDERR_WAVEDEVNOTSUPPORTED = -2147220718 +AUDERR_NOTRECORDING = -2147220717 +AUDERR_INVALIDFLAG = -2147220988 +AUDERR_NODRIVER = -2147220713 +AUDERR_HANDLEBUSY = -2147220712 +AUDERR_INVALIDNOTIFYSINK = -2147220711 +AUDERR_WAVENOTENABLED = -2147220710 +AUDERR_ALREADYCLAIMED = -2147220707 +AUDERR_NOTCLAIMED = -2147220706 +AUDERR_STILLPLAYING = -2147220705 +AUDERR_ALREADYSTARTED = -2147220704 +AUDERR_SYNCNOTALLOWED = -2147220703 + LANGID = WORD QWORD = c_ulonglong @@ -224,6 +245,61 @@ class ITTSNotifySinkW(IUnknown): ITTSNotifySink = ITTSNotifySinkW + +class IAudio(IUnknown): + _iid_ = GUID("{F546B340-C743-11cd-80E5-00AA003E4B50}") + + +IAudio._methods_ = [ + COMMETHOD([], HRESULT, "Flush"), + COMMETHOD([], HRESULT, "LevelGet", (["out"], POINTER(DWORD), "pdwLevel")), + COMMETHOD([], HRESULT, "LevelSet", (["in"], DWORD, "dwLevel")), + COMMETHOD( + [], + HRESULT, + "PassNotify", + (["in"], c_void_p, "pNotifyInterface"), + (["in"], GUID, "IIDNotifyInterface"), + ), + COMMETHOD([], HRESULT, "PosnGet", (["out"], POINTER(QWORD), "pqwTimeStamp")), + COMMETHOD([], HRESULT, "Claim"), + COMMETHOD([], HRESULT, "UnClaim"), + COMMETHOD([], HRESULT, "Start"), + COMMETHOD([], HRESULT, "Stop"), + COMMETHOD([], HRESULT, "TotalGet", (["out"], POINTER(QWORD), "pqWord")), + COMMETHOD( + [], HRESULT, "ToFileTime", (["in"], POINTER(QWORD), "pqWord"), (["out"], POINTER(FILETIME), "pFT") + ), + COMMETHOD([], HRESULT, "WaveFormatGet", (["out"], POINTER(SDATA), "pdWFEX")), + COMMETHOD([], HRESULT, "WaveFormatSet", (["in"], SDATA, "dWFEX")), +] + + +class IAudioDest(IUnknown): + _iid_ = GUID("{2EC34DA0-C743-11cd-80E5-00AA003E4B50}") + + +IAudioDest._methods_ = [ + COMMETHOD( + [], HRESULT, "FreeSpace", (["out"], POINTER(DWORD), "pdwBytes"), (["out"], POINTER(BOOL), "pfEOF") + ), + COMMETHOD([], HRESULT, "DataSet", (["in"], c_void_p, "pBuffer"), (["in"], DWORD, "dwSize")), + COMMETHOD([], HRESULT, "BookMark", (["in"], DWORD, "dwMarkID")), +] + + +class IAudioDestNotifySink(IUnknown): + _iid_ = GUID("{ACB08C00-C743-11cd-80E5-00AA003E4B50}") + + +IAudioDestNotifySink._methods_ = [ + STDMETHOD(HRESULT, "AudioStop", [WORD]), + STDMETHOD(HRESULT, "AudioStart"), + STDMETHOD(HRESULT, "FreeSpace", [DWORD, BOOL]), + STDMETHOD(HRESULT, "BookMark", [DWORD, BOOL]), +] + + CLSID_MMAudioDest = GUID("{CB96B400-C743-11cd-80E5-00AA003E4B50}") CLSID_TTSEnumerator = GUID("{D67C0280-C743-11cd-80E5-00AA003E4B50}") diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 59a3873154..8b5eeae942 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -4,26 +4,37 @@ # See the file COPYING for more details. # This module is deprecated, pending removal in NVDA 2026.1. +from datetime import datetime +from enum import IntEnum import locale from collections import OrderedDict, deque +import threading import winreg -from comtypes import CoCreateInstance, COMObject, COMError, GUID -from ctypes import byref, c_ulong, POINTER, c_wchar, create_string_buffer, sizeof, windll -from ctypes.wintypes import DWORD, HANDLE, WORD +from comtypes import CoCreateInstance, COMObject, COMError, GUID, hresult, ReturnHRESULT +from ctypes import addressof, byref, c_ulong, POINTER, c_void_p, cast, memmove, string_at, sizeof, windll +from ctypes.wintypes import BOOL, DWORD, FILETIME, WORD from typing import Optional from autoSettingsUtils.driverSetting import BooleanDriverSetting import gui.contextHelp import gui.message +import nvwave import queueHandler from synthDriverHandler import SynthDriver, VoiceInfo, synthIndexReached, synthDoneSpeaking, synthChanged from logHandler import log import warnings from utils.security import isRunningOnSecureDesktop from ._sapi4 import ( - MMSYSERR_NOERROR, - CLSID_MMAudioDest, + AUDERR_ALREADYCLAIMED, + AUDERR_ALREADYSTARTED, + AUDERR_INVALIDNOTIFYSINK, + AUDERR_NEEDWAVEFORMAT, + AUDERR_NOTCLAIMED, + AUDERR_WAVEFORMATNOTSUPPORTED, + SDATA, CLSID_TTSEnumerator, - IAudioMultiMediaDevice, + IAudio, + IAudioDest, + IAudioDestNotifySink, ITTSAttributes, ITTSBufNotifySink, ITTSCentralW, @@ -42,7 +53,6 @@ TTSFEATURE_VOLUME, TTSMODEINFO, VOICECHARSET, - DriverMessage, ) import config import weakref @@ -94,6 +104,292 @@ def IUnknown_Release(self, this, *args, **kwargs): return super(SynthDriverBufSink, self).IUnknown_Release(this, *args, **kwargs) +class _DeviceState(IntEnum): + CLOSED = 0 # Not claimed + OPENED = 1 # Claimed + RUNNING = 2 # Started + CLOSING = 3 # Unclaiming + + +class SynthDriverAudio(COMObject): + """ + Implements IAudio and IAudioDest to receive streamed in audio data. + An instance of this class will be passed to, + and be used by the TTS engine. + """ + + _com_interfaces_ = [IAudio, IAudioDest] + + def __init__(self): + self._notifySink = None + self._deviceState = _DeviceState.CLOSED + self._waveFormat: Optional[nvwave.WAVEFORMATEX] = None + self._player: Optional[nvwave.WavePlayer] = None + self._writtenBytes = 0 + self._playedBytes = 0 + self._startTime = datetime.now() + self._audioQueue: deque[bytes | int] = deque() # bytes: audio, int: bookmark + self._audioCond = threading.Condition() + self._audioStopped = False + self._audioThread: Optional[threading.Thread] = None + + def terminate(self): + self._shutdownAudioThread() + + def __del__(self): + self.terminate() + + def _maybeInitPlayer(self) -> None: + """Initialize audio playback based on the wave format provided by the engine. + If the format has not changed, the existing player is used. + Otherwise, a new one is created with the appropriate parameters.""" + if self._player: + # Reuse the previous player if possible (using the same format) + if ( + self._player.channels == self._waveFormat.nChannels + and self._player.samplesPerSec == self._waveFormat.nSamplesPerSec + and self._player.bitsPerSample == self._waveFormat.wBitsPerSample + ): + return # same format, use the previous player + # different format, close and recreate a new player + self._player.stop() + self._player = nvwave.WavePlayer( + channels=self._waveFormat.nChannels, + samplesPerSec=self._waveFormat.nSamplesPerSec, + bitsPerSample=self._waveFormat.wBitsPerSample, + outputDevice=config.conf["audio"]["outputDevice"], + ) + self._player.open() + + def IAudio_Flush(self) -> None: + """Clears the object's internal buffer and resets the audio device, + but does not stop playing the audio data afterwards.""" + if self._player: + self._player.stop() + with self._audioCond: + if self._notifySink: + while self._audioQueue: + item = self._audioQueue.popleft() + if isinstance(item, int): + # Flush all untriggered bookmarks. + # 1 (TRUE) means that the bookmark is sent because of flushing. + self._notifySink.BookMark(item, 1) + self._audioQueue.clear() + + def IAudio_LevelGet(self) -> int: + """Returns the volume level, ranging from 0x0000 to 0xFFFF. + Low word is for the left (or mono) channel, and high word is for the right channel.""" + # TODO: Not implemented yet. + return 0xFFFF + + def IAudio_LevelSet(self, dwLevel: int) -> None: + """Sets the volume level, ranging from 0x0000 to 0xFFFF. + Low word is for the left (or mono) channel, and high word is for the right channel.""" + # TODO: Not implemented yet. + pass + + def IAudio_PassNotify(self, pNotifyInterface: c_void_p, IIDNotifyInterface: GUID) -> None: + """Passes in an implementation of IAudioDestNotifySink to receive notifications. + The previous sink, if exists, will be released and replaced. + Allows specifying NULL for no sink.""" + if IIDNotifyInterface != IAudioDestNotifySink._iid_: + log.debugWarning("Only IAudioDestNotifySink is allowed") + raise ReturnHRESULT(AUDERR_INVALIDNOTIFYSINK, None) + if self._notifySink: + self._notifySink = None + if pNotifyInterface: + self._notifySink = cast(pNotifyInterface, POINTER(IAudioDestNotifySink)) + + def IAudio_PosnGet(self) -> int: + """Returns the byte position currently being played, + which should increase monotonically and never reset.""" + return self._playedBytes + + def IAudio_Claim(self): + """Acquires (opens) the multimedia device. + `IAudioDestNotifySink::AudioStart()` will be called to notify the engine.""" + if not self._waveFormat: + raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) + if self._deviceState == _DeviceState.CLOSING: + # Close immediately + self.IAudio_Flush() + self._finishClose() + if self._deviceState != _DeviceState.CLOSED: + raise ReturnHRESULT(AUDERR_ALREADYCLAIMED, None) + self._maybeInitPlayer() + self._startAudioThread() + self._deviceState = _DeviceState.OPENED + if self._notifySink: + self._notifySink.AudioStart() + + def IAudio_UnClaim(self): + """Releases the multimedia device asynchronously. + `IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops.""" + if self._deviceState == _DeviceState.CLOSED: + raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) + self._deviceState = _DeviceState.CLOSING + with self._audioCond: + self._audioCond.notify() + + def IAudio_Start(self) -> None: + """Starts (or resumes) playing.""" + if self._deviceState == _DeviceState.RUNNING: + raise ReturnHRESULT(AUDERR_ALREADYSTARTED, None) + if self._deviceState != _DeviceState.OPENED: + raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) + try: + self._player.pause(False) + except OSError: + pass + self._deviceState = _DeviceState.RUNNING + + def IAudio_Stop(self) -> None: + """Stops (or pauses) playing, without clearing the buffer.""" + if self._deviceState != _DeviceState.RUNNING: + return # no error returned + try: + self._player.pause(True) + except OSError: + pass + self._deviceState = _DeviceState.OPENED + + def IAudio_TotalGet(self) -> int: + """Returns the total number of bytes written, + including the unplayed bytes in the buffer, + which should increase monotonically and never reset.""" + return self._writtenBytes + + def IAudio_ToFileTime(self, pqWord): + # TODO: Add type hint + """Converts a byte position to UTC FILETIME.""" + if not self._waveFormat: + raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) + filetime_ticks = int((self._startTime.timestamp() + 11644473600) * 10_000_000) + filetime_ticks += pqWord[0] * 10_000_000 // self._waveFormat.nAvgBytesPerSec + return FILETIME(filetime_ticks & 0xFFFFFFFF, filetime_ticks >> 32) + + def IAudio_WaveFormatGet(self) -> SDATA: + """Gets a copy of the current wave format. + :returns: A pointer to the WAVEFORMATEX structure. + Should be freed by the caller using CoTaskMemFree.""" + if not self._waveFormat: + raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) + size = sizeof(nvwave.WAVEFORMATEX) + ptr = windll.ole32.CoTaskMemAlloc(size) + if not ptr: + raise COMError(hresult.E_OUTOFMEMORY, "CoTaskMemAlloc failed", (None, None, None, None, None)) + memmove(ptr, addressof(self._waveFormat), size) + return SDATA(ptr, size) + + def IAudio_WaveFormatSet(self, dWFEX: SDATA) -> None: + """Sets the current wave format. Only integer PCM formats are supported.""" + size = 18 # SAPI4 uses 18 bytes without the final padding + if dWFEX.dwSize < size: + log.debugWarning("Invalid wave format size") + raise ReturnHRESULT(hresult.E_INVALIDARG, None) + pWfx = cast(dWFEX.pData, POINTER(nvwave.WAVEFORMATEX)) + if pWfx[0].wFormatTag != nvwave.WAVE_FORMAT_PCM: + log.debugWarning("Wave format not supported. Only integer PCM formats are supported.") + raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None) + if self._deviceState != _DeviceState.CLOSED: + log.debugWarning("Cannot change wave format during playback.") + raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None) + self._waveFormat = nvwave.WAVEFORMATEX() + memmove(addressof(self._waveFormat), pWfx, size) + + def _getFreeSpace(self) -> int: + if not self._waveFormat: + raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) + return self._waveFormat.nAvgBytesPerSec // 5 # always 200ms + + def IAudioDest_FreeSpace(self) -> tuple[DWORD, BOOL]: + # TODO: Docstring about return value + """Returns the number of bytes that are free in the object's internal buffer.""" + return (self._getFreeSpace(), 0) + + def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int): + """Writes audio data to the end of the object's internal buffer. + This should not block. + When data cannot fit in the buffer, this should return AUDERR_NOTENOUGHDATA immediately.""" + if self._deviceState != _DeviceState.RUNNING and self._deviceState != _DeviceState.OPENED: + log.debugWarning("Audio data written when device is not claimed") + raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) + with self._audioCond: + self._audioQueue.append(string_at(pBuffer, dwSize)) + self._writtenBytes += dwSize + self._audioCond.notify() + + def IAudioDest_BookMark(self, dwMarkID: int): + """Attaches a bookmark to the most recent data in the audio-destination object's internal buffer. + When the bookmark is reached, `IAudioDestNotifySink::BookMark` is called. + When Flush is called, untriggered bookmarks should also be triggered.""" + with self._audioCond: + self._audioQueue.append(dwMarkID) + + def _audioThreadFunc(self): + while True: + with self._audioCond: + while not self._audioQueue and not self._audioStopped: + # Since WavePlayer.feed returns before the audio finishes, + # in order not to lose the final callbacks + # when there's no more audio to feed, + # wait with a timeout to give WavePlayer a chance + # to check the callbacks periodically. + self._audioCond.wait(0.1) + if self._audioQueue: + break + if self._deviceState == _DeviceState.CLOSING: + # Closing in progress, wait for the audio to finish + self._player.feed(None, 0, lambda: self._finishClose()) + else: + # Call feed to let WavePlayer check the callbacks + self._player.feed(None, 0, None) + if self._audioStopped: + return + item = self._audioQueue.popleft() + if isinstance(item, bytes): # audio + self._player.feed(item, len(item), lambda item=item: self._onChunkFinished(item)) + elif isinstance(item, int): # bookmark + if self._playedBytes == self._writtenBytes: + self._onBookmark(item) # trigger immediately + else: + self._player.feed(None, 0, lambda item=item: self._onBookmark(item)) + + def _startAudioThread(self): + if self._audioThread: + return + self._audioStopped = False + self._audioThread = threading.Thread(target=self._audioThreadFunc) + self._audioThread.start() + + def _shutdownAudioThread(self): + if not self._audioThread: + return + with self._audioCond: + self._audioStopped = True + self._audioCond.notify() + if self._audioThread is not threading.current_thread(): + self._audioThread.join() + self._audioThread = None + + def _onChunkFinished(self, chunk: bytes): + self._playedBytes += len(chunk) + if self._notifySink: + self._notifySink.FreeSpace(self._getFreeSpace(), 0) + + def _onBookmark(self, dwMarkID: int): + if self._notifySink: + self._notifySink.BookMark(dwMarkID, 0) + + def _finishClose(self): + if self._deviceState == _DeviceState.CLOSING: + self._player.stop() + self._shutdownAudioThread() + if self._notifySink: + self._notifySink.AudioStop(0) # IANSRSN_NODATA + self._deviceState = _DeviceState.CLOSED + + class SynthDriverSink(COMObject): _com_interfaces_ = [ITTSNotifySinkW] @@ -192,6 +488,7 @@ def __init__(self): def terminate(self): self._bufSink._allowDelete = True + self._ttsAudio.terminate() def speak(self, speechSequence: SpeechSequence): textList = [] @@ -302,8 +599,7 @@ def _set_voice(self, val): if mode is None: raise ValueError("no such mode: %s" % val) self._currentMode = mode - self._ttsAudio = CoCreateInstance(CLSID_MMAudioDest, IAudioMultiMediaDevice) - self._ttsAudio.DeviceNumSet(_mmDeviceEndpointIdToWaveOutId(config.conf["audio"]["outputDevice"])) + self._ttsAudio = SynthDriverAudio() if self._ttsCentral: self._ttsCentral.UnRegister(self._sinkRegKey) self._ttsCentral = POINTER(ITTSCentralW)() @@ -447,52 +743,6 @@ def _set_volume(self, val: int): self._ttsAttrs.VolumeSet(val) -def _mmDeviceEndpointIdToWaveOutId(targetEndpointId: str) -> int: - """Translate from an MMDevice Endpoint ID string to a WaveOut Device ID number. - - :param targetEndpointId: MMDevice endpoint ID string to translate from, or the default value of the `audio.outputDevice` configuration key for the default output device. - :return: An integer WaveOut device ID for use with SAPI4. - If no matching device is found, or the default output device is requested, `-1` is returned, which means output will be handled by Microsoft Sound Mapper. - """ - if targetEndpointId != config.conf.getConfigValidation(("audio", "outputDevice")).default: - targetEndpointIdByteCount = (len(targetEndpointId) + 1) * sizeof(c_wchar) - currEndpointId = create_string_buffer(targetEndpointIdByteCount) - currEndpointIdByteCount = DWORD() - # Defined in mmeapi.h - winmm = windll.winmm - waveOutMessage = winmm.waveOutMessage - waveOutGetNumDevs = winmm.waveOutGetNumDevs - for devID in range(waveOutGetNumDevs()): - # Get the length of this device's endpoint ID string. - mmr = waveOutMessage( - HANDLE(devID), - DriverMessage.QUERY_INSTANCE_ID_SIZE, - byref(currEndpointIdByteCount), - None, - ) - if (mmr != MMSYSERR_NOERROR) or (currEndpointIdByteCount.value != targetEndpointIdByteCount): - # ID lengths don't match, so this device can't be a match. - continue - # Get the device's endpoint ID string. - mmr = waveOutMessage( - HANDLE(devID), - DriverMessage.QUERY_INSTANCE_ID, - byref(currEndpointId), - currEndpointIdByteCount, - ) - if mmr != MMSYSERR_NOERROR: - continue - # Decode the endpoint ID string to a python string, and strip the null terminator. - if ( - currEndpointId.raw[: targetEndpointIdByteCount - sizeof(c_wchar)].decode("utf-16") - == targetEndpointId - ): - return devID - # No matching device found, or default requested explicitly. - # Return the ID of Microsoft Sound Mapper - return -1 - - def _sapi4DeprecationWarning(synth: SynthDriver, audioOutputDevice: str, isFallback: bool): """A synthChanged event handler to alert the user about the deprecation of SAPI4.""" From 36d183ed093158226495b64172ed8776b82e70d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 21 Feb 2025 04:15:29 +0000 Subject: [PATCH 02/21] Pre-commit auto-fix --- source/synthDrivers/_sapi4.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/source/synthDrivers/_sapi4.py b/source/synthDrivers/_sapi4.py index a1b9ca2a5b..f67e664d3c 100755 --- a/source/synthDrivers/_sapi4.py +++ b/source/synthDrivers/_sapi4.py @@ -268,7 +268,11 @@ class IAudio(IUnknown): COMMETHOD([], HRESULT, "Stop"), COMMETHOD([], HRESULT, "TotalGet", (["out"], POINTER(QWORD), "pqWord")), COMMETHOD( - [], HRESULT, "ToFileTime", (["in"], POINTER(QWORD), "pqWord"), (["out"], POINTER(FILETIME), "pFT") + [], + HRESULT, + "ToFileTime", + (["in"], POINTER(QWORD), "pqWord"), + (["out"], POINTER(FILETIME), "pFT"), ), COMMETHOD([], HRESULT, "WaveFormatGet", (["out"], POINTER(SDATA), "pdWFEX")), COMMETHOD([], HRESULT, "WaveFormatSet", (["in"], SDATA, "dWFEX")), @@ -281,7 +285,11 @@ class IAudioDest(IUnknown): IAudioDest._methods_ = [ COMMETHOD( - [], HRESULT, "FreeSpace", (["out"], POINTER(DWORD), "pdwBytes"), (["out"], POINTER(BOOL), "pfEOF") + [], + HRESULT, + "FreeSpace", + (["out"], POINTER(DWORD), "pdwBytes"), + (["out"], POINTER(BOOL), "pfEOF"), ), COMMETHOD([], HRESULT, "DataSet", (["in"], c_void_p, "pBuffer"), (["in"], DWORD, "dwSize")), COMMETHOD([], HRESULT, "BookMark", (["in"], DWORD, "dwMarkID")), From ad95fea42d7733acd03dcc6717358cfc4ecc01a3 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Fri, 21 Feb 2025 23:13:34 +0800 Subject: [PATCH 03/21] Add type hints & doc strings --- source/synthDrivers/sapi4.py | 46 +++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 8b5eeae942..8a3ffcba59 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -11,9 +11,21 @@ import threading import winreg from comtypes import CoCreateInstance, COMObject, COMError, GUID, hresult, ReturnHRESULT -from ctypes import addressof, byref, c_ulong, POINTER, c_void_p, cast, memmove, string_at, sizeof, windll +from ctypes import ( + addressof, + byref, + c_ulong, + c_ulonglong, + POINTER, + c_void_p, + cast, + memmove, + string_at, + sizeof, + windll, +) from ctypes.wintypes import BOOL, DWORD, FILETIME, WORD -from typing import Optional +from typing import TYPE_CHECKING, Optional from autoSettingsUtils.driverSetting import BooleanDriverSetting import gui.contextHelp import gui.message @@ -111,6 +123,14 @@ class _DeviceState(IntEnum): CLOSING = 3 # Unclaiming +if TYPE_CHECKING: + from ctypes import _Pointer + + c_ulonglong_p = _Pointer[c_ulonglong] +else: + c_ulonglong_p = POINTER(c_ulonglong) + + class SynthDriverAudio(COMObject): """ Implements IAudio and IAudioDest to receive streamed in audio data. @@ -205,7 +225,7 @@ def IAudio_PosnGet(self) -> int: which should increase monotonically and never reset.""" return self._playedBytes - def IAudio_Claim(self): + def IAudio_Claim(self) -> None: """Acquires (opens) the multimedia device. `IAudioDestNotifySink::AudioStart()` will be called to notify the engine.""" if not self._waveFormat: @@ -222,7 +242,7 @@ def IAudio_Claim(self): if self._notifySink: self._notifySink.AudioStart() - def IAudio_UnClaim(self): + def IAudio_UnClaim(self) -> None: """Releases the multimedia device asynchronously. `IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops.""" if self._deviceState == _DeviceState.CLOSED: @@ -259,8 +279,7 @@ def IAudio_TotalGet(self) -> int: which should increase monotonically and never reset.""" return self._writtenBytes - def IAudio_ToFileTime(self, pqWord): - # TODO: Add type hint + def IAudio_ToFileTime(self, pqWord: c_ulonglong_p) -> None: """Converts a byte position to UTC FILETIME.""" if not self._waveFormat: raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) @@ -303,11 +322,13 @@ def _getFreeSpace(self) -> int: return self._waveFormat.nAvgBytesPerSec // 5 # always 200ms def IAudioDest_FreeSpace(self) -> tuple[DWORD, BOOL]: - # TODO: Docstring about return value - """Returns the number of bytes that are free in the object's internal buffer.""" + """Returns the number of bytes that are free in the object's internal buffer. + :returns: Tuple (dwBytes, fEOF). + dwBytes: number of bytes available. + fEOF: TRUE if end-of-file is reached and no more data can be sent.""" return (self._getFreeSpace(), 0) - def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int): + def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int) -> None: """Writes audio data to the end of the object's internal buffer. This should not block. When data cannot fit in the buffer, this should return AUDERR_NOTENOUGHDATA immediately.""" @@ -319,7 +340,7 @@ def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int): self._writtenBytes += dwSize self._audioCond.notify() - def IAudioDest_BookMark(self, dwMarkID: int): + def IAudioDest_BookMark(self, dwMarkID: int) -> None: """Attaches a bookmark to the most recent data in the audio-destination object's internal buffer. When the bookmark is reached, `IAudioDestNotifySink::BookMark` is called. When Flush is called, untriggered bookmarks should also be triggered.""" @@ -327,6 +348,7 @@ def IAudioDest_BookMark(self, dwMarkID: int): self._audioQueue.append(dwMarkID) def _audioThreadFunc(self): + """Audio thread function that feeds the audio data from queue to WavePlayer.""" while True: with self._audioCond: while not self._audioQueue and not self._audioStopped: @@ -382,12 +404,14 @@ def _onBookmark(self, dwMarkID: int): self._notifySink.BookMark(dwMarkID, 0) def _finishClose(self): + """Finishes the asynchronous UnClaim call.""" if self._deviceState == _DeviceState.CLOSING: self._player.stop() self._shutdownAudioThread() + self._deviceState = _DeviceState.CLOSED if self._notifySink: + # Notify when the device is finally closed self._notifySink.AudioStop(0) # IANSRSN_NODATA - self._deviceState = _DeviceState.CLOSED class SynthDriverSink(COMObject): From 302968a2b36b2603ce9af12a549f51cf7c196208 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Fri, 21 Feb 2025 23:17:12 +0800 Subject: [PATCH 04/21] Change log level in `cancel` to debugWarning, because error 0x80010005 is sometimes raised --- source/synthDrivers/sapi4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 8a3ffcba59..26e19af930 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -591,7 +591,7 @@ def cancel(self): self._bookmarks = None self._ttsCentral.AudioReset() except COMError: - log.error("Error cancelling speech", exc_info=True) + log.debugWarning("Error cancelling speech", exc_info=True) finally: self._finalIndex = None From df66a5253a518f7cb9ea076f53bb80ac854aa73d Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Sat, 22 Feb 2025 08:59:36 +0800 Subject: [PATCH 05/21] Reset `_startTime` on start --- source/synthDrivers/sapi4.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 26e19af930..08c6bdbebe 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -148,6 +148,7 @@ def __init__(self): self._writtenBytes = 0 self._playedBytes = 0 self._startTime = datetime.now() + self._startBytes = 0 self._audioQueue: deque[bytes | int] = deque() # bytes: audio, int: bookmark self._audioCond = threading.Condition() self._audioStopped = False @@ -257,6 +258,8 @@ def IAudio_Start(self) -> None: raise ReturnHRESULT(AUDERR_ALREADYSTARTED, None) if self._deviceState != _DeviceState.OPENED: raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) + self._startTime = datetime.now() + self._startBytes = self._playedBytes try: self._player.pause(False) except OSError: @@ -284,7 +287,7 @@ def IAudio_ToFileTime(self, pqWord: c_ulonglong_p) -> None: if not self._waveFormat: raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) filetime_ticks = int((self._startTime.timestamp() + 11644473600) * 10_000_000) - filetime_ticks += pqWord[0] * 10_000_000 // self._waveFormat.nAvgBytesPerSec + filetime_ticks += (pqWord[0] - self._startBytes) * 10_000_000 // self._waveFormat.nAvgBytesPerSec return FILETIME(filetime_ticks & 0xFFFFFFFF, filetime_ticks >> 32) def IAudio_WaveFormatGet(self) -> SDATA: From d57d6c4b9ab161c142dd3b8f4c7bb9028b4cb512 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Sat, 22 Feb 2025 09:13:31 +0800 Subject: [PATCH 06/21] Implement LevelGet/Set --- source/synthDrivers/sapi4.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 08c6bdbebe..37e907315d 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -153,6 +153,7 @@ def __init__(self): self._audioCond = threading.Condition() self._audioStopped = False self._audioThread: Optional[threading.Thread] = None + self._level = 0xFFFFFFFF def terminate(self): self._shutdownAudioThread() @@ -181,6 +182,7 @@ def _maybeInitPlayer(self) -> None: outputDevice=config.conf["audio"]["outputDevice"], ) self._player.open() + self.IAudio_LevelSet(self._level) def IAudio_Flush(self) -> None: """Clears the object's internal buffer and resets the audio device, @@ -200,14 +202,16 @@ def IAudio_Flush(self) -> None: def IAudio_LevelGet(self) -> int: """Returns the volume level, ranging from 0x0000 to 0xFFFF. Low word is for the left (or mono) channel, and high word is for the right channel.""" - # TODO: Not implemented yet. - return 0xFFFF + return self._level def IAudio_LevelSet(self, dwLevel: int) -> None: """Sets the volume level, ranging from 0x0000 to 0xFFFF. Low word is for the left (or mono) channel, and high word is for the right channel.""" - # TODO: Not implemented yet. - pass + self._level = dwLevel + if dwLevel & 0xFFFF0000: + self._player.setVolume(left=float(dwLevel & 0xFFFF) / 0xFFFF, right=float(dwLevel >> 16) / 0xFFFF) + else: + self._player.setVolume(all=float(dwLevel) / 0xFFFF) def IAudio_PassNotify(self, pNotifyInterface: c_void_p, IIDNotifyInterface: GUID) -> None: """Passes in an implementation of IAudioDestNotifySink to receive notifications. From 793a2be9f73cb9d1476ecf4275aa2cc965ae5741 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Sat, 22 Feb 2025 11:43:16 +0800 Subject: [PATCH 07/21] Add changelog entry --- user_docs/en/changes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index c79cce0de3..1dffa2812a 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -78,6 +78,7 @@ Prefix matching on command line flags, e.g. using `--di` for `--disable-addons` * The keyboard settings for "Speak typed characters" and "Speak typed words" now have three options: Off, Only in edit controls, and Always. (#17505, @Cary-rowen) * By default, "Speak typed characters" is now set to "Only in edit controls". * The silence at the beginning of speech will now be trimmed when using OneCore voices, SAPI5 voices, and some third-party voice add-ons to improve their responsiveness. (#17614, @gexgd0419) +* Microsoft Speech API version 4 voices now use WASAPI for audio output, so that they can work with features such as audio ducking, leading silence trimming, and keeping audio device awake. (#17718, @gexgd0419) ### Security Fixes From 858ff5ed6fc0a81bb2308fd41b023d3c9bad6e1d Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Mon, 24 Feb 2025 00:37:56 +0800 Subject: [PATCH 08/21] Change implementation, add docstring --- source/synthDrivers/sapi4.py | 172 +++++++++++++++++++++-------------- 1 file changed, 105 insertions(+), 67 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 37e907315d..b593de3ac8 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -116,13 +116,6 @@ def IUnknown_Release(self, this, *args, **kwargs): return super(SynthDriverBufSink, self).IUnknown_Release(this, *args, **kwargs) -class _DeviceState(IntEnum): - CLOSED = 0 # Not claimed - OPENED = 1 # Claimed - RUNNING = 2 # Started - CLOSING = 3 # Unclaiming - - if TYPE_CHECKING: from ctypes import _Pointer @@ -136,13 +129,31 @@ class SynthDriverAudio(COMObject): Implements IAudio and IAudioDest to receive streamed in audio data. An instance of this class will be passed to, and be used by the TTS engine. + + Typically, an engine does the following things to output audio. + (Note that different engines may have different implementations) + + - Initialize, such as setting wave format with `WaveFormatSet`, setting notify sink with `PassNotify`, etc. + - Call `Claim` to prepare the audio output. + - Call `DataSet` to prepare some initial audio data. + - Call `Start` to start playing. + - Call `DataSet` to provide more audio data, + and call `BookMark` when the engine want to know when audio reaches a specific point. + - Call `UnClaim` when all the audio has been written. The audio will still be played to the end. + - When pausing the audio, it calls `Stop` and `UnClaim`. + - When unpausing the audio, it calls `Claim` and `Start`. + - When resetting the audio, it calls `Stop`, `Flush`, and `UnClaim`. + `Stop` and `UnClaim` will not clear the buffer, but `Flush` will. """ _com_interfaces_ = [IAudio, IAudioDest] def __init__(self): self._notifySink = None - self._deviceState = _DeviceState.CLOSED + self._deviceClaimed = False + self._deviceStarted = False + self._deviceUnClaiming = False + self._deviceUnClaimingBytePos: Optional[int] = None self._waveFormat: Optional[nvwave.WAVEFORMATEX] = None self._player: Optional[nvwave.WavePlayer] = None self._writtenBytes = 0 @@ -152,11 +163,16 @@ def __init__(self): self._audioQueue: deque[bytes | int] = deque() # bytes: audio, int: bookmark self._audioCond = threading.Condition() self._audioStopped = False - self._audioThread: Optional[threading.Thread] = None + self._audioThread = threading.Thread(target=self._audioThreadFunc) + self._audioThread.start() self._level = 0xFFFFFFFF def terminate(self): - self._shutdownAudioThread() + with self._audioCond: + self._audioStopped = True + self._audioCond.notify() + if self._audioThread is not threading.current_thread(): + self._audioThread.join() def __del__(self): self.terminate() @@ -232,53 +248,74 @@ def IAudio_PosnGet(self) -> int: def IAudio_Claim(self) -> None: """Acquires (opens) the multimedia device. - `IAudioDestNotifySink::AudioStart()` will be called to notify the engine.""" + Called before the engine wants to write audio data. + `IAudioDestNotifySink::AudioStart()` will be called to notify the engine. + Previous buffer should not be cleared. + If Claim is called before unclaiming completes, unclaiming is canceled, + and neither AudioStop nor AudioStart is notified.""" if not self._waveFormat: raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) - if self._deviceState == _DeviceState.CLOSING: - # Close immediately - self.IAudio_Flush() - self._finishClose() - if self._deviceState != _DeviceState.CLOSED: + with self._audioCond: + if self._deviceUnClaiming: + # Unclaiming is cancelled, but nothing else is touched. + self._deviceUnClaiming = False + self._deviceUnClaimingBytePos = None + return + if self._deviceClaimed: raise ReturnHRESULT(AUDERR_ALREADYCLAIMED, None) self._maybeInitPlayer() - self._startAudioThread() - self._deviceState = _DeviceState.OPENED + self._deviceClaimed = True if self._notifySink: self._notifySink.AudioStart() def IAudio_UnClaim(self) -> None: """Releases the multimedia device asynchronously. + Called after the engine completes writing all audio data. + If there is audio in the buffer, it should still be played till the end. `IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops.""" - if self._deviceState == _DeviceState.CLOSED: + if not self._deviceClaimed: raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) - self._deviceState = _DeviceState.CLOSING + if not self._deviceStarted: + # When not playing, this can finish immediately. + self._deviceClaimed = False + if self._notifySink: + self._notifySink.AudioStop(0) # IANSRSN_NODATA + return + # When playing, wait for the playback to finish. with self._audioCond: + self._deviceUnClaiming = True + self._deviceUnClaimingBytePos = self._writtenBytes self._audioCond.notify() def IAudio_Start(self) -> None: - """Starts (or resumes) playing.""" - if self._deviceState == _DeviceState.RUNNING: + """Starts (or resumes) playing the audio in the buffer.""" + if self._deviceStarted: raise ReturnHRESULT(AUDERR_ALREADYSTARTED, None) - if self._deviceState != _DeviceState.OPENED: + if not self._deviceClaimed: raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) self._startTime = datetime.now() self._startBytes = self._playedBytes try: self._player.pause(False) except OSError: - pass - self._deviceState = _DeviceState.RUNNING + log.debugWarning("Error starting audio", exc_info=True) + with self._audioCond: + self._deviceStarted = True + self._audioCond.notify() def IAudio_Stop(self) -> None: - """Stops (or pauses) playing, without clearing the buffer.""" - if self._deviceState != _DeviceState.RUNNING: + """Stops (or pauses) playing, without clearing the buffer. + If there is audio in the buffer, calling Stop and UnClaim should keep the buffer + and only pause the playback.""" + if not self._deviceStarted: return # no error returned try: self._player.pause(True) except OSError: - pass - self._deviceState = _DeviceState.OPENED + log.debugWarning("Error stopping audio", exc_info=True) + with self._audioCond: + self._deviceStarted = False + self._audioCond.notify() def IAudio_TotalGet(self) -> int: """Returns the total number of bytes written, @@ -317,7 +354,7 @@ def IAudio_WaveFormatSet(self, dWFEX: SDATA) -> None: if pWfx[0].wFormatTag != nvwave.WAVE_FORMAT_PCM: log.debugWarning("Wave format not supported. Only integer PCM formats are supported.") raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None) - if self._deviceState != _DeviceState.CLOSED: + if self._deviceStarted or self._audioQueue: log.debugWarning("Cannot change wave format during playback.") raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None) self._waveFormat = nvwave.WAVEFORMATEX() @@ -339,7 +376,7 @@ def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int) -> None: """Writes audio data to the end of the object's internal buffer. This should not block. When data cannot fit in the buffer, this should return AUDERR_NOTENOUGHDATA immediately.""" - if self._deviceState != _DeviceState.RUNNING and self._deviceState != _DeviceState.OPENED: + if not self._deviceClaimed or self._deviceUnClaiming: log.debugWarning("Audio data written when device is not claimed") raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) with self._audioCond: @@ -358,18 +395,28 @@ def _audioThreadFunc(self): """Audio thread function that feeds the audio data from queue to WavePlayer.""" while True: with self._audioCond: - while not self._audioQueue and not self._audioStopped: - # Since WavePlayer.feed returns before the audio finishes, - # in order not to lose the final callbacks - # when there's no more audio to feed, - # wait with a timeout to give WavePlayer a chance - # to check the callbacks periodically. - self._audioCond.wait(0.1) - if self._audioQueue: + while not self._audioStopped and not (self._deviceStarted and self._audioQueue): + if self._deviceStarted: + # Since WavePlayer.feed returns before the audio finishes, + # in order not to lose the final callbacks + # when there's no more audio to feed, + # wait with a timeout to give WavePlayer a chance + # to check the callbacks periodically. + self._audioCond.wait(0.1) + else: + self._audioCond.wait() + if self._deviceStarted and self._audioQueue: break - if self._deviceState == _DeviceState.CLOSING: + if not self._player: + continue + if self._deviceUnClaimingBytePos is not None: # Closing in progress, wait for the audio to finish - self._player.feed(None, 0, lambda: self._finishClose()) + self._player.feed( + None, + 0, + lambda bytePos=self._deviceUnClaimingBytePos: self._finishUnClaim(bytePos), + ) + self._deviceUnClaimingBytePos = None else: # Call feed to let WavePlayer check the callbacks self._player.feed(None, 0, None) @@ -384,23 +431,6 @@ def _audioThreadFunc(self): else: self._player.feed(None, 0, lambda item=item: self._onBookmark(item)) - def _startAudioThread(self): - if self._audioThread: - return - self._audioStopped = False - self._audioThread = threading.Thread(target=self._audioThreadFunc) - self._audioThread.start() - - def _shutdownAudioThread(self): - if not self._audioThread: - return - with self._audioCond: - self._audioStopped = True - self._audioCond.notify() - if self._audioThread is not threading.current_thread(): - self._audioThread.join() - self._audioThread = None - def _onChunkFinished(self, chunk: bytes): self._playedBytes += len(chunk) if self._notifySink: @@ -410,15 +440,20 @@ def _onBookmark(self, dwMarkID: int): if self._notifySink: self._notifySink.BookMark(dwMarkID, 0) - def _finishClose(self): - """Finishes the asynchronous UnClaim call.""" - if self._deviceState == _DeviceState.CLOSING: - self._player.stop() - self._shutdownAudioThread() - self._deviceState = _DeviceState.CLOSED - if self._notifySink: - # Notify when the device is finally closed - self._notifySink.AudioStop(0) # IANSRSN_NODATA + def _finishUnClaim(self, bytePos: int): + """Finishes the asynchronous UnClaim call. + + :param bytePos: The written byte count when this UnClaim request is made. + This is checked to prevent triggering on outdated UnClaim requests.""" + if not self._deviceUnClaiming or self._writtenBytes != bytePos: + return + self._player.stop() + self._deviceStarted = False + self._deviceUnClaiming = False + self._deviceClaimed = False + if self._notifySink: + # Notify when the device is finally closed + self._notifySink.AudioStop(0) # IANSRSN_NODATA class SynthDriverSink(COMObject): @@ -505,6 +540,7 @@ def __init__(self): self._sinkPtr = self._sink.QueryInterface(ITTSNotifySinkW) self._bufSink = SynthDriverBufSink(weakref.ref(self)) self._bufSinkPtr = self._bufSink.QueryInterface(ITTSBufNotifySink) + self._ttsAudio: Optional[SynthDriverAudio] = None # HACK: Some buggy engines call Release() too many times on our buf sink. # Therefore, don't let the buf sink be deleted before we release it ourselves. self._bufSink._allowDelete = False @@ -630,6 +666,8 @@ def _set_voice(self, val): if mode is None: raise ValueError("no such mode: %s" % val) self._currentMode = mode + if self._ttsAudio: + self._ttsAudio.terminate() self._ttsAudio = SynthDriverAudio() if self._ttsCentral: self._ttsCentral.UnRegister(self._sinkRegKey) From 03ce8108102d3659c33193afeeec4466fc4b4828 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 23 Feb 2025 16:38:50 +0000 Subject: [PATCH 09/21] Pre-commit auto-fix --- source/synthDrivers/sapi4.py | 1 - 1 file changed, 1 deletion(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index b593de3ac8..4e76ccef17 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -5,7 +5,6 @@ # This module is deprecated, pending removal in NVDA 2026.1. from datetime import datetime -from enum import IntEnum import locale from collections import OrderedDict, deque import threading From a06d3f4a1adab4676a3a27cb7c5f40a582cbf2c2 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Mon, 24 Feb 2025 19:17:17 +0800 Subject: [PATCH 10/21] Stop the player when unclaiming with empty buffer --- source/synthDrivers/sapi4.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 4e76ccef17..2501eb309c 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -274,17 +274,20 @@ def IAudio_UnClaim(self) -> None: `IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops.""" if not self._deviceClaimed: raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) - if not self._deviceStarted: + if self._deviceStarted: + # When playing, wait for the playback to finish. + with self._audioCond: + self._deviceUnClaiming = True + self._deviceUnClaimingBytePos = self._writtenBytes + self._audioCond.notify() + else: # When not playing, this can finish immediately. + if self._writtenBytes == self._playedBytes and not self._audioQueue: + # If all audio is done playing, stop the player. + self._player.stop() self._deviceClaimed = False if self._notifySink: self._notifySink.AudioStop(0) # IANSRSN_NODATA - return - # When playing, wait for the playback to finish. - with self._audioCond: - self._deviceUnClaiming = True - self._deviceUnClaimingBytePos = self._writtenBytes - self._audioCond.notify() def IAudio_Start(self) -> None: """Starts (or resumes) playing the audio in the buffer.""" From edfca3ea2a23bce67a0f358c4fe26423e25e1042 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Mon, 24 Feb 2025 22:25:03 +0800 Subject: [PATCH 11/21] Remove WinMM-related stuff --- source/synthDrivers/_sapi4.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/source/synthDrivers/_sapi4.py b/source/synthDrivers/_sapi4.py index f67e664d3c..f73d565760 100755 --- a/source/synthDrivers/_sapi4.py +++ b/source/synthDrivers/_sapi4.py @@ -18,7 +18,6 @@ Structure, ) from ctypes.wintypes import BOOL, BYTE, DWORD, FILETIME, LPCWSTR, WORD -from enum import IntEnum from comtypes import GUID, IUnknown, STDMETHOD, COMMETHOD import winKernel @@ -310,19 +309,3 @@ class IAudioDestNotifySink(IUnknown): CLSID_MMAudioDest = GUID("{CB96B400-C743-11cd-80E5-00AA003E4B50}") CLSID_TTSEnumerator = GUID("{D67C0280-C743-11cd-80E5-00AA003E4B50}") - - -# Defined in mmsyscom.h -MMSYSERR_NOERROR = 0 - - -class DriverMessage(IntEnum): - """WaveOutMessage message codes - Defined in mmddk.h - """ - - QUERY_INSTANCE_ID = 2065 - """DRV_QUERYFUNCTIONINSTANCEID """ - - QUERY_INSTANCE_ID_SIZE = 2066 - """DRV_QUERYFUNCTIONINSTANCEIDSIZE """ From 024e52807d6d9627c5220c78bdd7055c96f9d12f Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 25 Feb 2025 10:17:31 +0800 Subject: [PATCH 12/21] Apply suggestions from code review Co-authored-by: Sean Budd --- source/synthDrivers/sapi4.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 2501eb309c..e7915db7f0 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -152,14 +152,17 @@ def __init__(self): self._deviceClaimed = False self._deviceStarted = False self._deviceUnClaiming = False - self._deviceUnClaimingBytePos: Optional[int] = None - self._waveFormat: Optional[nvwave.WAVEFORMATEX] = None - self._player: Optional[nvwave.WavePlayer] = None + self._deviceUnClaimingBytePos: int | None = None + self._waveFormat: nvwave.WAVEFORMATEX | None = None + self._player: nvwave.WavePlayer | None = None self._writtenBytes = 0 self._playedBytes = 0 self._startTime = datetime.now() self._startBytes = 0 - self._audioQueue: deque[bytes | int] = deque() # bytes: audio, int: bookmark +AudioT: TypeAlias = bytes +BookmarkT: TypeAlias = int + + self._audioQueue: deque[AudioT | BookmarkT] = deque() self._audioCond = threading.Condition() self._audioStopped = False self._audioThread = threading.Thread(target=self._audioThreadFunc) @@ -208,7 +211,7 @@ def IAudio_Flush(self) -> None: if self._notifySink: while self._audioQueue: item = self._audioQueue.popleft() - if isinstance(item, int): + if isinstance(item, BookmarkT): # Flush all untriggered bookmarks. # 1 (TRUE) means that the bookmark is sent because of flushing. self._notifySink.BookMark(item, 1) @@ -329,7 +332,8 @@ def IAudio_ToFileTime(self, pqWord: c_ulonglong_p) -> None: """Converts a byte position to UTC FILETIME.""" if not self._waveFormat: raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) - filetime_ticks = int((self._startTime.timestamp() + 11644473600) * 10_000_000) + UNIX_TIME_CONV = 1_1644_473_600 + filetime_ticks = int((self._startTime.timestamp() + UNIX_TIME_CONV) * 10_000_000) filetime_ticks += (pqWord[0] - self._startBytes) * 10_000_000 // self._waveFormat.nAvgBytesPerSec return FILETIME(filetime_ticks & 0xFFFFFFFF, filetime_ticks >> 32) @@ -386,7 +390,7 @@ def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int) -> None: self._writtenBytes += dwSize self._audioCond.notify() - def IAudioDest_BookMark(self, dwMarkID: int) -> None: + def IAudioDest_BookMark(self, dwMarkID: BookmarkT) -> None: """Attaches a bookmark to the most recent data in the audio-destination object's internal buffer. When the bookmark is reached, `IAudioDestNotifySink::BookMark` is called. When Flush is called, untriggered bookmarks should also be triggered.""" @@ -425,20 +429,20 @@ def _audioThreadFunc(self): if self._audioStopped: return item = self._audioQueue.popleft() - if isinstance(item, bytes): # audio + if isinstance(item, AudioT): self._player.feed(item, len(item), lambda item=item: self._onChunkFinished(item)) - elif isinstance(item, int): # bookmark + elif isinstance(item, BookmarkT): if self._playedBytes == self._writtenBytes: self._onBookmark(item) # trigger immediately else: self._player.feed(None, 0, lambda item=item: self._onBookmark(item)) - def _onChunkFinished(self, chunk: bytes): + def _onChunkFinished(self, chunk: AudioT): self._playedBytes += len(chunk) if self._notifySink: self._notifySink.FreeSpace(self._getFreeSpace(), 0) - def _onBookmark(self, dwMarkID: int): + def _onBookmark(self, dwMarkID: BookmarkT): if self._notifySink: self._notifySink.BookMark(dwMarkID, 0) @@ -542,7 +546,7 @@ def __init__(self): self._sinkPtr = self._sink.QueryInterface(ITTSNotifySinkW) self._bufSink = SynthDriverBufSink(weakref.ref(self)) self._bufSinkPtr = self._bufSink.QueryInterface(ITTSBufNotifySink) - self._ttsAudio: Optional[SynthDriverAudio] = None + self._ttsAudio: SynthDriverAudio | None = None # HACK: Some buggy engines call Release() too many times on our buf sink. # Therefore, don't let the buf sink be deleted before we release it ourselves. self._bufSink._allowDelete = False From 2d0e7d3b44b25cf08973d81f191ca3d1b587848c Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 25 Feb 2025 10:20:01 +0800 Subject: [PATCH 13/21] Move TypeAlias definitions --- source/synthDrivers/sapi4.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index e7915db7f0..ef9ba268b6 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -24,7 +24,7 @@ windll, ) from ctypes.wintypes import BOOL, DWORD, FILETIME, WORD -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Optional, TypeAlias from autoSettingsUtils.driverSetting import BooleanDriverSetting import gui.contextHelp import gui.message @@ -121,6 +121,8 @@ def IUnknown_Release(self, this, *args, **kwargs): c_ulonglong_p = _Pointer[c_ulonglong] else: c_ulonglong_p = POINTER(c_ulonglong) +AudioT: TypeAlias = bytes +BookmarkT: TypeAlias = int class SynthDriverAudio(COMObject): @@ -159,9 +161,6 @@ def __init__(self): self._playedBytes = 0 self._startTime = datetime.now() self._startBytes = 0 -AudioT: TypeAlias = bytes -BookmarkT: TypeAlias = int - self._audioQueue: deque[AudioT | BookmarkT] = deque() self._audioCond = threading.Condition() self._audioStopped = False @@ -431,7 +430,7 @@ def _audioThreadFunc(self): item = self._audioQueue.popleft() if isinstance(item, AudioT): self._player.feed(item, len(item), lambda item=item: self._onChunkFinished(item)) - elif isinstance(item, BookmarkT): + elif isinstance(item, BookmarkT): if self._playedBytes == self._writtenBytes: self._onBookmark(item) # trigger immediately else: From 79016ecc0691f6140d736faa919a12f802d6cea7 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 25 Feb 2025 10:25:25 +0800 Subject: [PATCH 14/21] Check if `_ttsAudio` is null before termination --- source/synthDrivers/sapi4.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index ce7442b39b..64ccbdb79e 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -561,7 +561,8 @@ def __init__(self): def terminate(self): self._bufSink._allowDelete = True - self._ttsAudio.terminate() + if self._ttsAudio: + self._ttsAudio.terminate() self._ttsCentral = None self._ttsAttrs = None From 9603d31cf7109cb9c1df2c217f61db66920f412e Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:18:26 +0800 Subject: [PATCH 15/21] Make AUDERR codes an enum --- source/synthDrivers/_sapi4.py | 45 +++++++++++++++++++---------------- source/synthDrivers/sapi4.py | 33 +++++++++++-------------- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/source/synthDrivers/_sapi4.py b/source/synthDrivers/_sapi4.py index f73d565760..a03de1be40 100755 --- a/source/synthDrivers/_sapi4.py +++ b/source/synthDrivers/_sapi4.py @@ -18,6 +18,7 @@ Structure, ) from ctypes.wintypes import BOOL, BYTE, DWORD, FILETIME, LPCWSTR, WORD +from enum import IntEnum from comtypes import GUID, IUnknown, STDMETHOD, COMMETHOD import winKernel @@ -42,26 +43,30 @@ TTSFEATURE_PITCH = 8 TTSFEATURE_FIXEDAUDIO = 1024 -# Audio related error codes -AUDERR_BADDEVICEID = -2147220735 -AUDERR_NEEDWAVEFORMAT = -2147220734 -AUDERR_NOTSUPPORTED = -2147467263 # E_NOTIMPL -AUDERR_NOTENOUGHDATA = -2147220991 -AUDERR_NOTPLAYING = -2147220730 -AUDERR_WAVEFORMATNOTSUPPORTED = -2147220990 -AUDERR_WAVEDEVICEBUSY = -2147220989 -AUDERR_WAVEDEVNOTSUPPORTED = -2147220718 -AUDERR_NOTRECORDING = -2147220717 -AUDERR_INVALIDFLAG = -2147220988 -AUDERR_NODRIVER = -2147220713 -AUDERR_HANDLEBUSY = -2147220712 -AUDERR_INVALIDNOTIFYSINK = -2147220711 -AUDERR_WAVENOTENABLED = -2147220710 -AUDERR_ALREADYCLAIMED = -2147220707 -AUDERR_NOTCLAIMED = -2147220706 -AUDERR_STILLPLAYING = -2147220705 -AUDERR_ALREADYSTARTED = -2147220704 -AUDERR_SYNCNOTALLOWED = -2147220703 + +class AudioError(IntEnum): + """SAPI4 audio related error codes.""" + + BAD_DEVICE_ID = -2147220735 + NEED_WAVE_FORMAT = -2147220734 + NOT_SUPPORTED = -2147467263 # E_NOTIMPL + NOT_ENOUGH_DATA = -2147220991 + NOT_PLAYING = -2147220730 + WAVE_FORMAT_NOT_SUPPORTED = -2147220990 + WAVE_DEVICE_BUSY = -2147220989 + WAVE_DEV_NOT_SUPPORTED = -2147220718 + NOT_RECORDING = -2147220717 + INVALID_FLAG = -2147220988 + NO_DRIVER = -2147220713 + HANDLE_BUSY = -2147220712 + INVALID_NOTIFY_SINK = -2147220711 + WAVE_NOT_ENABLED = -2147220710 + ALREADY_CLAIMED = -2147220707 + NOT_CLAIMED = -2147220706 + STILL_PLAYING = -2147220705 + ALREADY_STARTED = -2147220704 + SYNC_NOT_ALLOWED = -2147220703 + LANGID = WORD QWORD = c_ulonglong diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 64ccbdb79e..385d96a334 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -35,12 +35,7 @@ import warnings from utils.security import isRunningOnSecureDesktop from ._sapi4 import ( - AUDERR_ALREADYCLAIMED, - AUDERR_ALREADYSTARTED, - AUDERR_INVALIDNOTIFYSINK, - AUDERR_NEEDWAVEFORMAT, - AUDERR_NOTCLAIMED, - AUDERR_WAVEFORMATNOTSUPPORTED, + AudioError, SDATA, CLSID_TTSEnumerator, IAudio, @@ -236,7 +231,7 @@ def IAudio_PassNotify(self, pNotifyInterface: c_void_p, IIDNotifyInterface: GUID Allows specifying NULL for no sink.""" if IIDNotifyInterface != IAudioDestNotifySink._iid_: log.debugWarning("Only IAudioDestNotifySink is allowed") - raise ReturnHRESULT(AUDERR_INVALIDNOTIFYSINK, None) + raise ReturnHRESULT(AudioError.INVALID_NOTIFY_SINK, None) if self._notifySink: self._notifySink = None if pNotifyInterface: @@ -255,7 +250,7 @@ def IAudio_Claim(self) -> None: If Claim is called before unclaiming completes, unclaiming is canceled, and neither AudioStop nor AudioStart is notified.""" if not self._waveFormat: - raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) + raise ReturnHRESULT(AudioError.NEED_WAVE_FORMAT, None) with self._audioCond: if self._deviceUnClaiming: # Unclaiming is cancelled, but nothing else is touched. @@ -263,7 +258,7 @@ def IAudio_Claim(self) -> None: self._deviceUnClaimingBytePos = None return if self._deviceClaimed: - raise ReturnHRESULT(AUDERR_ALREADYCLAIMED, None) + raise ReturnHRESULT(AudioError.ALREADY_CLAIMED, None) self._maybeInitPlayer() self._deviceClaimed = True if self._notifySink: @@ -275,7 +270,7 @@ def IAudio_UnClaim(self) -> None: If there is audio in the buffer, it should still be played till the end. `IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops.""" if not self._deviceClaimed: - raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) + raise ReturnHRESULT(AudioError.NOT_CLAIMED, None) if self._deviceStarted: # When playing, wait for the playback to finish. with self._audioCond: @@ -294,9 +289,9 @@ def IAudio_UnClaim(self) -> None: def IAudio_Start(self) -> None: """Starts (or resumes) playing the audio in the buffer.""" if self._deviceStarted: - raise ReturnHRESULT(AUDERR_ALREADYSTARTED, None) + raise ReturnHRESULT(AudioError.ALREADY_STARTED, None) if not self._deviceClaimed: - raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) + raise ReturnHRESULT(AudioError.NOT_CLAIMED, None) self._startTime = datetime.now() self._startBytes = self._playedBytes try: @@ -330,7 +325,7 @@ def IAudio_TotalGet(self) -> int: def IAudio_ToFileTime(self, pqWord: c_ulonglong_p) -> None: """Converts a byte position to UTC FILETIME.""" if not self._waveFormat: - raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) + raise ReturnHRESULT(AudioError.NEED_WAVE_FORMAT, None) UNIX_TIME_CONV = 1_1644_473_600 filetime_ticks = int((self._startTime.timestamp() + UNIX_TIME_CONV) * 10_000_000) filetime_ticks += (pqWord[0] - self._startBytes) * 10_000_000 // self._waveFormat.nAvgBytesPerSec @@ -341,7 +336,7 @@ def IAudio_WaveFormatGet(self) -> SDATA: :returns: A pointer to the WAVEFORMATEX structure. Should be freed by the caller using CoTaskMemFree.""" if not self._waveFormat: - raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) + raise ReturnHRESULT(AudioError.NEED_WAVE_FORMAT, None) size = sizeof(nvwave.WAVEFORMATEX) ptr = windll.ole32.CoTaskMemAlloc(size) if not ptr: @@ -358,16 +353,16 @@ def IAudio_WaveFormatSet(self, dWFEX: SDATA) -> None: pWfx = cast(dWFEX.pData, POINTER(nvwave.WAVEFORMATEX)) if pWfx[0].wFormatTag != nvwave.WAVE_FORMAT_PCM: log.debugWarning("Wave format not supported. Only integer PCM formats are supported.") - raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None) + raise ReturnHRESULT(AudioError.WAVE_FORMAT_NOT_SUPPORTED, None) if self._deviceStarted or self._audioQueue: log.debugWarning("Cannot change wave format during playback.") - raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None) + raise ReturnHRESULT(AudioError.WAVE_FORMAT_NOT_SUPPORTED, None) self._waveFormat = nvwave.WAVEFORMATEX() memmove(addressof(self._waveFormat), pWfx, size) def _getFreeSpace(self) -> int: if not self._waveFormat: - raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None) + raise ReturnHRESULT(AudioError.NEED_WAVE_FORMAT, None) return self._waveFormat.nAvgBytesPerSec // 5 # always 200ms def IAudioDest_FreeSpace(self) -> tuple[DWORD, BOOL]: @@ -380,10 +375,10 @@ def IAudioDest_FreeSpace(self) -> tuple[DWORD, BOOL]: def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int) -> None: """Writes audio data to the end of the object's internal buffer. This should not block. - When data cannot fit in the buffer, this should return AUDERR_NOTENOUGHDATA immediately.""" + When data cannot fit in the buffer, this should return AudioError.NOT_ENOUGH_DATA immediately.""" if not self._deviceClaimed or self._deviceUnClaiming: log.debugWarning("Audio data written when device is not claimed") - raise ReturnHRESULT(AUDERR_NOTCLAIMED, None) + raise ReturnHRESULT(AudioError.NOT_CLAIMED, None) with self._audioCond: self._audioQueue.append(string_at(pBuffer, dwSize)) self._writtenBytes += dwSize From 8694ae4d96e16d15d52ef299274784d8a6e0cbcb Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:35:30 +0800 Subject: [PATCH 16/21] Add comment and type hint --- source/synthDrivers/sapi4.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 385d96a334..465ca421ea 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -114,8 +114,10 @@ def IUnknown_Release(self, this, *args, **kwargs): from ctypes import _Pointer c_ulonglong_p = _Pointer[c_ulonglong] + LP_IAudioDestNotifySink = _Pointer[IAudioDestNotifySink] else: c_ulonglong_p = POINTER(c_ulonglong) + LP_IAudioDestNotifySink = POINTER(IAudioDestNotifySink) AudioT: TypeAlias = bytes BookmarkT: TypeAlias = int @@ -145,7 +147,7 @@ class SynthDriverAudio(COMObject): _com_interfaces_ = [IAudio, IAudioDest] def __init__(self): - self._notifySink = None + self._notifySink: LP_IAudioDestNotifySink = None self._deviceClaimed = False self._deviceStarted = False self._deviceUnClaiming = False @@ -161,7 +163,7 @@ def __init__(self): self._audioStopped = False self._audioThread = threading.Thread(target=self._audioThreadFunc) self._audioThread.start() - self._level = 0xFFFFFFFF + self._level = 0xFFFFFFFF # defaults to maximum value (0xFFFF) for both channels (low and high word) def terminate(self): with self._audioCond: @@ -235,7 +237,7 @@ def IAudio_PassNotify(self, pNotifyInterface: c_void_p, IIDNotifyInterface: GUID if self._notifySink: self._notifySink = None if pNotifyInterface: - self._notifySink = cast(pNotifyInterface, POINTER(IAudioDestNotifySink)) + self._notifySink = cast(pNotifyInterface, LP_IAudioDestNotifySink) def IAudio_PosnGet(self) -> int: """Returns the byte position currently being played, From e99c940d749d8fa96ea4ea33018809463b41d434 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:36:53 +0800 Subject: [PATCH 17/21] Release notifySink when terminated --- source/synthDrivers/sapi4.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 465ca421ea..bb9e5e9171 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -171,6 +171,7 @@ def terminate(self): self._audioCond.notify() if self._audioThread is not threading.current_thread(): self._audioThread.join() + self._notifySink = None def __del__(self): self.terminate() From 968261ce39882ee3d84a56973c0cf0b5d1f090e7 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:47:03 +0800 Subject: [PATCH 18/21] Remove SAPI 4 deprecation messages --- source/synthDrivers/sapi4.py | 55 ++---------------------------------- user_docs/en/changes.md | 4 --- user_docs/en/userGuide.md | 1 - 3 files changed, 2 insertions(+), 58 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index bb9e5e9171..6ecba04a76 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -25,15 +25,9 @@ ) from ctypes.wintypes import BOOL, DWORD, FILETIME, WORD from typing import TYPE_CHECKING, Optional, TypeAlias -from autoSettingsUtils.driverSetting import BooleanDriverSetting -import gui.contextHelp -import gui.message import nvwave -import queueHandler -from synthDriverHandler import SynthDriver, VoiceInfo, synthIndexReached, synthDoneSpeaking, synthChanged +from synthDriverHandler import SynthDriver, VoiceInfo, synthIndexReached, synthDoneSpeaking from logHandler import log -import warnings -from utils.security import isRunningOnSecureDesktop from ._sapi4 import ( AudioError, SDATA, @@ -76,9 +70,6 @@ from speech.types import SpeechSequence -warnings.warn("synthDrivers.sapi4 is deprecated, pending removal in NVDA 2026.1.", DeprecationWarning) - - class SynthDriverBufSink(COMObject): _com_interfaces_ = [ITTSBufNotifySink] @@ -498,10 +489,7 @@ def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp: int): class SynthDriver(SynthDriver): name = "sapi4" description = "Microsoft Speech API version 4" - supportedSettings = [ - SynthDriver.VoiceSetting(), - BooleanDriverSetting("_hasWarningBeenShown", ""), - ] + supportedSettings = [SynthDriver.VoiceSetting()] supportedCommands = { IndexCommand, CharacterModeCommand, @@ -834,42 +822,3 @@ def _set_volume(self, val: int): # using the low word for the left channel and the high word for the right channel. val |= val << 16 self._ttsAttrs.VolumeSet(val) - - -def _sapi4DeprecationWarning(synth: SynthDriver, audioOutputDevice: str, isFallback: bool): - """A synthChanged event handler to alert the user about the deprecation of SAPI4.""" - - def setShown(payload: gui.message.Payload): - synth._hasWarningBeenShown = True - synth.saveSettings() - - def impl(): - gui.message.MessageDialog( - parent=None, - message=_( - # Translators: Message warning users that SAPI4 is deprecated. - "Microsoft Speech API version 4 is obsolete. " - "Using this speech synthesizer may pose a security risk. " - "This synthesizer driver will be removed in NVDA 2026.1. " - "You are strongly encouraged to choose a more modern speech synthesizer. " - "Consult the Supported Speech Synthesizers section in the User Guide for suggestions. ", - ), - # Translators: Title of a message dialog. - title=_("Warning"), - buttons=None, - ).addOkButton( - callback=setShown, - ).addHelpButton( - # Translators: A button in a dialog. - label=_("Open user guide"), - callback=lambda payload: gui.contextHelp.showHelp("SupportedSpeechSynths"), - ).Show() - - if (not isFallback) and (synth.name == "sapi4") and (not getattr(synth, "_hasWarningBeenShown", False)): - # We need to queue the dialog to appear, as wx may not have been initialised the first time this is called. - queueHandler.queueFunction(queueHandler.eventQueue, impl) - - -if not isRunningOnSecureDesktop(): - # Don't warn users about SAPI4 deprecation when running on a secure desktop. - synthChanged.register(_sapi4DeprecationWarning) diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index 0c57cbfd0f..5c0d0e36b1 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -4,9 +4,6 @@ ### Important notes -* The support for Microsoft Speech API version 4 synthesizers is planned for removal in NVDA 2026.1. -Any remaining users of SAPI4 speech synthesizers are encouraged to choose a more modern speech synthesizer. (#17599) - ### New Features * Add-on Store: @@ -246,7 +243,6 @@ Use `gui.message.MessageDialog` instead. (#17582) * `NoConsoleOptionParser`, `stringToBool`, `stringToLang` in `__main__`; use the same symbols in `argsParsing` instead. * `__main__.parser`; use `argsParsing.getParser()` instead. * `bdDetect.DeviceType` is deprecated in favour of `bdDetect.ProtocolType` and `bdDetect.CommunicationType` to take into account the fact that both HID and Serial communication can take place over USB and Bluetooth. (#17537 , @LeonarddeR) -* SAPI4, `synthDrivers.sapi4`, is deprecated and planned for removal in 2026.1. (#17599) ## 2024.4.2 diff --git a/user_docs/en/userGuide.md b/user_docs/en/userGuide.md index f0e68a5ba4..2f85388134 100644 --- a/user_docs/en/userGuide.md +++ b/user_docs/en/userGuide.md @@ -3942,7 +3942,6 @@ There are also many variants which can be chosen to alter the sound of the voice SAPI 4 is an older Microsoft standard for software speech synthesizers. NVDA still supports this for users who already have SAPI 4 synthesizers installed. However, Microsoft no longer support this and needed components are no longer available from Microsoft. -Support for SAPI4 will be removed in NVDA 2026.1. When using this synthesizer with NVDA, the available voices (accessed from the [Speech category](#SpeechSettings) of the [NVDA Settings](#NVDASettings) dialog or by the [Synth Settings Ring](#SynthSettingsRing)) will contain all the voices from all the installed SAPI 4 engines found on your system. From 648b391eb0a8111d0c76894112037f5ac5421659 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 25 Feb 2025 12:13:42 +0800 Subject: [PATCH 19/21] Add SAPI 4 links in projectDocs --- projectDocs/design/synthesizers.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/projectDocs/design/synthesizers.md b/projectDocs/design/synthesizers.md index 5032f10446..8acbec6d31 100644 --- a/projectDocs/design/synthesizers.md +++ b/projectDocs/design/synthesizers.md @@ -5,10 +5,11 @@ SAPI 4 synthesizers are not included with NVDA, and the runtimes are no longer included with Windows. Despite this, SAPI 4 support is still required, as many users prefer older synthesizers which rely on the SAPI 4 API. -To test SAPI 4, you must install the SAPI 4 runtimes from Microsoft, as well as a synthesizer. +To test SAPI 4, you must install the SAPI 4 runtimes (or the SDK containing the runtimes) from Microsoft, as well as a synthesizer. Microsoft no longer hosts downloads for these, but archives and mirrors exist. -1. Download and install the SAPI 4 runtimes from [this Microsoft archive](http://web.archive.org/web/20150910165037/http://activex.microsoft.com/activex/controls/sapi/spchapi.exe). -1. Download and install a SAPI 4 synthesizer from [this Microsoft archive](http://web.archive.org/web/20150910005021if_/http://activex.microsoft.com/activex/controls/agent2/tv_enua.exe) +First, you can download and install either only the SAPI 4 runtimes from [this Microsoft archive](http://web.archive.org/web/20150910165037/http://activex.microsoft.com/activex/controls/sapi/spchapi.exe), or the SAPI 4 SDK from [this Microsoft archive](https://web.archive.org/web/20030203082745/http://download.microsoft.com/download/speechSDK/Install/4.0a/WIN98/EN-US/SAPI4SDK.exe), which contains the SAPI 4 runtimes, offline documentation, and sample source code & test applications. If you only need the offline documentation, you can download the documentation for SAPI 4 COM interfaces from [this Microsoft archive](https://web.archive.org/web/19990418101425/http://www.microsoft.com/iit/onlineDocs/speechsdk4-com.chm). + +After installing the runtimes, download and install a SAPI 4 synthesizer from [this Microsoft archive](http://web.archive.org/web/20150910005021if_/http://activex.microsoft.com/activex/controls/agent2/tv_enua.exe). After this, you should be able to select SAPI 4 as a NVDA synthesizer. From 0ac0402f4b6b26c0f1c14dca8a9303d2fb6f3320 Mon Sep 17 00:00:00 2001 From: Sean Budd Date: Tue, 25 Feb 2025 15:27:35 +1100 Subject: [PATCH 20/21] Apply suggestions from code review --- projectDocs/design/synthesizers.md | 3 ++- source/synthDrivers/sapi4.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/projectDocs/design/synthesizers.md b/projectDocs/design/synthesizers.md index 8acbec6d31..eef162f4cd 100644 --- a/projectDocs/design/synthesizers.md +++ b/projectDocs/design/synthesizers.md @@ -8,7 +8,8 @@ Despite this, SAPI 4 support is still required, as many users prefer older synth To test SAPI 4, you must install the SAPI 4 runtimes (or the SDK containing the runtimes) from Microsoft, as well as a synthesizer. Microsoft no longer hosts downloads for these, but archives and mirrors exist. -First, you can download and install either only the SAPI 4 runtimes from [this Microsoft archive](http://web.archive.org/web/20150910165037/http://activex.microsoft.com/activex/controls/sapi/spchapi.exe), or the SAPI 4 SDK from [this Microsoft archive](https://web.archive.org/web/20030203082745/http://download.microsoft.com/download/speechSDK/Install/4.0a/WIN98/EN-US/SAPI4SDK.exe), which contains the SAPI 4 runtimes, offline documentation, and sample source code & test applications. If you only need the offline documentation, you can download the documentation for SAPI 4 COM interfaces from [this Microsoft archive](https://web.archive.org/web/19990418101425/http://www.microsoft.com/iit/onlineDocs/speechsdk4-com.chm). +First, you can download and install either only the SAPI 4 runtimes from [this Microsoft archive](http://web.archive.org/web/20150910165037/http://activex.microsoft.com/activex/controls/sapi/spchapi.exe), or the SAPI 4 SDK from [this Microsoft archive](https://web.archive.org/web/20030203082745/http://download.microsoft.com/download/speechSDK/Install/4.0a/WIN98/EN-US/SAPI4SDK.exe), which contains the SAPI 4 runtimes, offline documentation, and sample source code & test applications. +If you only need the offline documentation, you can download the documentation for SAPI 4 COM interfaces from [this Microsoft archive](https://web.archive.org/web/19990418101425/http://www.microsoft.com/iit/onlineDocs/speechsdk4-com.chm). After installing the runtimes, download and install a SAPI 4 synthesizer from [this Microsoft archive](http://web.archive.org/web/20150910005021if_/http://activex.microsoft.com/activex/controls/agent2/tv_enua.exe). diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 6ecba04a76..db77c1c3d3 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -109,6 +109,7 @@ def IUnknown_Release(self, this, *args, **kwargs): else: c_ulonglong_p = POINTER(c_ulonglong) LP_IAudioDestNotifySink = POINTER(IAudioDestNotifySink) + AudioT: TypeAlias = bytes BookmarkT: TypeAlias = int @@ -138,7 +139,7 @@ class SynthDriverAudio(COMObject): _com_interfaces_ = [IAudio, IAudioDest] def __init__(self): - self._notifySink: LP_IAudioDestNotifySink = None + self._notifySink: LP_IAudioDestNotifySink | None = None self._deviceClaimed = False self._deviceStarted = False self._deviceUnClaiming = False From 041bf7f66e876656e62f0b2d64e0980c1113e062 Mon Sep 17 00:00:00 2001 From: Sean Budd Date: Tue, 25 Feb 2025 15:30:55 +1100 Subject: [PATCH 21/21] Update source/synthDrivers/sapi4.py --- source/synthDrivers/sapi4.py | 1 - 1 file changed, 1 deletion(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index db77c1c3d3..82c5e48ba9 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -2,7 +2,6 @@ # Copyright (C) 2006-2025 NV Access Limited, Leonard de Ruijter # This file is covered by the GNU General Public License. # See the file COPYING for more details. -# This module is deprecated, pending removal in NVDA 2026.1. from datetime import datetime import locale