From 815293446fd60c8bd287249c27b1b6edf99b9af5 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Thu, 20 Feb 2025 10:31:06 +0800
Subject: [PATCH 01/21] Make SAPI4 voices use WASAPI

---
 source/synthDrivers/_sapi4.py |  80 +++++++-
 source/synthDrivers/sapi4.py  | 360 ++++++++++++++++++++++++++++------
 2 files changed, 383 insertions(+), 57 deletions(-)

diff --git a/source/synthDrivers/_sapi4.py b/source/synthDrivers/_sapi4.py
index 5480b8d144..a1b9ca2a5b 100755
--- a/source/synthDrivers/_sapi4.py
+++ b/source/synthDrivers/_sapi4.py
@@ -17,9 +17,9 @@
 	sizeof,
 	Structure,
 )
-from ctypes.wintypes import BYTE, DWORD, LPCWSTR, WORD
+from ctypes.wintypes import BOOL, BYTE, DWORD, FILETIME, LPCWSTR, WORD
 from enum import IntEnum
-from comtypes import GUID, IUnknown, STDMETHOD
+from comtypes import GUID, IUnknown, STDMETHOD, COMMETHOD
 
 import winKernel
 
@@ -43,6 +43,27 @@
 TTSFEATURE_PITCH = 8
 TTSFEATURE_FIXEDAUDIO = 1024
 
+# Audio related error codes
+AUDERR_BADDEVICEID = -2147220735
+AUDERR_NEEDWAVEFORMAT = -2147220734
+AUDERR_NOTSUPPORTED = -2147467263  # E_NOTIMPL
+AUDERR_NOTENOUGHDATA = -2147220991
+AUDERR_NOTPLAYING = -2147220730
+AUDERR_WAVEFORMATNOTSUPPORTED = -2147220990
+AUDERR_WAVEDEVICEBUSY = -2147220989
+AUDERR_WAVEDEVNOTSUPPORTED = -2147220718
+AUDERR_NOTRECORDING = -2147220717
+AUDERR_INVALIDFLAG = -2147220988
+AUDERR_NODRIVER = -2147220713
+AUDERR_HANDLEBUSY = -2147220712
+AUDERR_INVALIDNOTIFYSINK = -2147220711
+AUDERR_WAVENOTENABLED = -2147220710
+AUDERR_ALREADYCLAIMED = -2147220707
+AUDERR_NOTCLAIMED = -2147220706
+AUDERR_STILLPLAYING = -2147220705
+AUDERR_ALREADYSTARTED = -2147220704
+AUDERR_SYNCNOTALLOWED = -2147220703
+
 LANGID = WORD
 QWORD = c_ulonglong
 
@@ -224,6 +245,61 @@ class ITTSNotifySinkW(IUnknown):
 
 ITTSNotifySink = ITTSNotifySinkW
 
+
+class IAudio(IUnknown):
+	_iid_ = GUID("{F546B340-C743-11cd-80E5-00AA003E4B50}")
+
+
+IAudio._methods_ = [
+	COMMETHOD([], HRESULT, "Flush"),
+	COMMETHOD([], HRESULT, "LevelGet", (["out"], POINTER(DWORD), "pdwLevel")),
+	COMMETHOD([], HRESULT, "LevelSet", (["in"], DWORD, "dwLevel")),
+	COMMETHOD(
+		[],
+		HRESULT,
+		"PassNotify",
+		(["in"], c_void_p, "pNotifyInterface"),
+		(["in"], GUID, "IIDNotifyInterface"),
+	),
+	COMMETHOD([], HRESULT, "PosnGet", (["out"], POINTER(QWORD), "pqwTimeStamp")),
+	COMMETHOD([], HRESULT, "Claim"),
+	COMMETHOD([], HRESULT, "UnClaim"),
+	COMMETHOD([], HRESULT, "Start"),
+	COMMETHOD([], HRESULT, "Stop"),
+	COMMETHOD([], HRESULT, "TotalGet", (["out"], POINTER(QWORD), "pqWord")),
+	COMMETHOD(
+		[], HRESULT, "ToFileTime", (["in"], POINTER(QWORD), "pqWord"), (["out"], POINTER(FILETIME), "pFT")
+	),
+	COMMETHOD([], HRESULT, "WaveFormatGet", (["out"], POINTER(SDATA), "pdWFEX")),
+	COMMETHOD([], HRESULT, "WaveFormatSet", (["in"], SDATA, "dWFEX")),
+]
+
+
+class IAudioDest(IUnknown):
+	_iid_ = GUID("{2EC34DA0-C743-11cd-80E5-00AA003E4B50}")
+
+
+IAudioDest._methods_ = [
+	COMMETHOD(
+		[], HRESULT, "FreeSpace", (["out"], POINTER(DWORD), "pdwBytes"), (["out"], POINTER(BOOL), "pfEOF")
+	),
+	COMMETHOD([], HRESULT, "DataSet", (["in"], c_void_p, "pBuffer"), (["in"], DWORD, "dwSize")),
+	COMMETHOD([], HRESULT, "BookMark", (["in"], DWORD, "dwMarkID")),
+]
+
+
+class IAudioDestNotifySink(IUnknown):
+	_iid_ = GUID("{ACB08C00-C743-11cd-80E5-00AA003E4B50}")
+
+
+IAudioDestNotifySink._methods_ = [
+	STDMETHOD(HRESULT, "AudioStop", [WORD]),
+	STDMETHOD(HRESULT, "AudioStart"),
+	STDMETHOD(HRESULT, "FreeSpace", [DWORD, BOOL]),
+	STDMETHOD(HRESULT, "BookMark", [DWORD, BOOL]),
+]
+
+
 CLSID_MMAudioDest = GUID("{CB96B400-C743-11cd-80E5-00AA003E4B50}")
 CLSID_TTSEnumerator = GUID("{D67C0280-C743-11cd-80E5-00AA003E4B50}")
 
diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 59a3873154..8b5eeae942 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -4,26 +4,37 @@
 # See the file COPYING for more details.
 # This module is deprecated, pending removal in NVDA 2026.1.
 
+from datetime import datetime
+from enum import IntEnum
 import locale
 from collections import OrderedDict, deque
+import threading
 import winreg
-from comtypes import CoCreateInstance, COMObject, COMError, GUID
-from ctypes import byref, c_ulong, POINTER, c_wchar, create_string_buffer, sizeof, windll
-from ctypes.wintypes import DWORD, HANDLE, WORD
+from comtypes import CoCreateInstance, COMObject, COMError, GUID, hresult, ReturnHRESULT
+from ctypes import addressof, byref, c_ulong, POINTER, c_void_p, cast, memmove, string_at, sizeof, windll
+from ctypes.wintypes import BOOL, DWORD, FILETIME, WORD
 from typing import Optional
 from autoSettingsUtils.driverSetting import BooleanDriverSetting
 import gui.contextHelp
 import gui.message
+import nvwave
 import queueHandler
 from synthDriverHandler import SynthDriver, VoiceInfo, synthIndexReached, synthDoneSpeaking, synthChanged
 from logHandler import log
 import warnings
 from utils.security import isRunningOnSecureDesktop
 from ._sapi4 import (
-	MMSYSERR_NOERROR,
-	CLSID_MMAudioDest,
+	AUDERR_ALREADYCLAIMED,
+	AUDERR_ALREADYSTARTED,
+	AUDERR_INVALIDNOTIFYSINK,
+	AUDERR_NEEDWAVEFORMAT,
+	AUDERR_NOTCLAIMED,
+	AUDERR_WAVEFORMATNOTSUPPORTED,
+	SDATA,
 	CLSID_TTSEnumerator,
-	IAudioMultiMediaDevice,
+	IAudio,
+	IAudioDest,
+	IAudioDestNotifySink,
 	ITTSAttributes,
 	ITTSBufNotifySink,
 	ITTSCentralW,
@@ -42,7 +53,6 @@
 	TTSFEATURE_VOLUME,
 	TTSMODEINFO,
 	VOICECHARSET,
-	DriverMessage,
 )
 import config
 import weakref
@@ -94,6 +104,292 @@ def IUnknown_Release(self, this, *args, **kwargs):
 		return super(SynthDriverBufSink, self).IUnknown_Release(this, *args, **kwargs)
 
 
+class _DeviceState(IntEnum):
+	CLOSED = 0  # Not claimed
+	OPENED = 1  # Claimed
+	RUNNING = 2  # Started
+	CLOSING = 3  # Unclaiming
+
+
+class SynthDriverAudio(COMObject):
+	"""
+	Implements IAudio and IAudioDest to receive streamed in audio data.
+	An instance of this class will be passed to,
+	and be used by the TTS engine.
+	"""
+
+	_com_interfaces_ = [IAudio, IAudioDest]
+
+	def __init__(self):
+		self._notifySink = None
+		self._deviceState = _DeviceState.CLOSED
+		self._waveFormat: Optional[nvwave.WAVEFORMATEX] = None
+		self._player: Optional[nvwave.WavePlayer] = None
+		self._writtenBytes = 0
+		self._playedBytes = 0
+		self._startTime = datetime.now()
+		self._audioQueue: deque[bytes | int] = deque()  # bytes: audio, int: bookmark
+		self._audioCond = threading.Condition()
+		self._audioStopped = False
+		self._audioThread: Optional[threading.Thread] = None
+
+	def terminate(self):
+		self._shutdownAudioThread()
+
+	def __del__(self):
+		self.terminate()
+
+	def _maybeInitPlayer(self) -> None:
+		"""Initialize audio playback based on the wave format provided by the engine.
+		If the format has not changed, the existing player is used.
+		Otherwise, a new one is created with the appropriate parameters."""
+		if self._player:
+			# Reuse the previous player if possible (using the same format)
+			if (
+				self._player.channels == self._waveFormat.nChannels
+				and self._player.samplesPerSec == self._waveFormat.nSamplesPerSec
+				and self._player.bitsPerSample == self._waveFormat.wBitsPerSample
+			):
+				return  # same format, use the previous player
+			# different format, close and recreate a new player
+			self._player.stop()
+		self._player = nvwave.WavePlayer(
+			channels=self._waveFormat.nChannels,
+			samplesPerSec=self._waveFormat.nSamplesPerSec,
+			bitsPerSample=self._waveFormat.wBitsPerSample,
+			outputDevice=config.conf["audio"]["outputDevice"],
+		)
+		self._player.open()
+
+	def IAudio_Flush(self) -> None:
+		"""Clears the object's internal buffer and resets the audio device,
+		but does not stop playing the audio data afterwards."""
+		if self._player:
+			self._player.stop()
+		with self._audioCond:
+			if self._notifySink:
+				while self._audioQueue:
+					item = self._audioQueue.popleft()
+					if isinstance(item, int):
+						# Flush all untriggered bookmarks.
+						# 1 (TRUE) means that the bookmark is sent because of flushing.
+						self._notifySink.BookMark(item, 1)
+			self._audioQueue.clear()
+
+	def IAudio_LevelGet(self) -> int:
+		"""Returns the volume level, ranging from 0x0000 to 0xFFFF.
+		Low word is for the left (or mono) channel, and high word is for the right channel."""
+		# TODO: Not implemented yet.
+		return 0xFFFF
+
+	def IAudio_LevelSet(self, dwLevel: int) -> None:
+		"""Sets the volume level, ranging from 0x0000 to 0xFFFF.
+		Low word is for the left (or mono) channel, and high word is for the right channel."""
+		# TODO: Not implemented yet.
+		pass
+
+	def IAudio_PassNotify(self, pNotifyInterface: c_void_p, IIDNotifyInterface: GUID) -> None:
+		"""Passes in an implementation of IAudioDestNotifySink to receive notifications.
+		The previous sink, if exists, will be released and replaced.
+		Allows specifying NULL for no sink."""
+		if IIDNotifyInterface != IAudioDestNotifySink._iid_:
+			log.debugWarning("Only IAudioDestNotifySink is allowed")
+			raise ReturnHRESULT(AUDERR_INVALIDNOTIFYSINK, None)
+		if self._notifySink:
+			self._notifySink = None
+		if pNotifyInterface:
+			self._notifySink = cast(pNotifyInterface, POINTER(IAudioDestNotifySink))
+
+	def IAudio_PosnGet(self) -> int:
+		"""Returns the byte position currently being played,
+		which should increase monotonically and never reset."""
+		return self._playedBytes
+
+	def IAudio_Claim(self):
+		"""Acquires (opens) the multimedia device.
+		`IAudioDestNotifySink::AudioStart()` will be called to notify the engine."""
+		if not self._waveFormat:
+			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
+		if self._deviceState == _DeviceState.CLOSING:
+			# Close immediately
+			self.IAudio_Flush()
+			self._finishClose()
+		if self._deviceState != _DeviceState.CLOSED:
+			raise ReturnHRESULT(AUDERR_ALREADYCLAIMED, None)
+		self._maybeInitPlayer()
+		self._startAudioThread()
+		self._deviceState = _DeviceState.OPENED
+		if self._notifySink:
+			self._notifySink.AudioStart()
+
+	def IAudio_UnClaim(self):
+		"""Releases the multimedia device asynchronously.
+		`IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops."""
+		if self._deviceState == _DeviceState.CLOSED:
+			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
+		self._deviceState = _DeviceState.CLOSING
+		with self._audioCond:
+			self._audioCond.notify()
+
+	def IAudio_Start(self) -> None:
+		"""Starts (or resumes) playing."""
+		if self._deviceState == _DeviceState.RUNNING:
+			raise ReturnHRESULT(AUDERR_ALREADYSTARTED, None)
+		if self._deviceState != _DeviceState.OPENED:
+			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
+		try:
+			self._player.pause(False)
+		except OSError:
+			pass
+		self._deviceState = _DeviceState.RUNNING
+
+	def IAudio_Stop(self) -> None:
+		"""Stops (or pauses) playing, without clearing the buffer."""
+		if self._deviceState != _DeviceState.RUNNING:
+			return  # no error returned
+		try:
+			self._player.pause(True)
+		except OSError:
+			pass
+		self._deviceState = _DeviceState.OPENED
+
+	def IAudio_TotalGet(self) -> int:
+		"""Returns the total number of bytes written,
+		including the unplayed bytes in the buffer,
+		which should increase monotonically and never reset."""
+		return self._writtenBytes
+
+	def IAudio_ToFileTime(self, pqWord):
+		# TODO: Add type hint
+		"""Converts a byte position to UTC FILETIME."""
+		if not self._waveFormat:
+			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
+		filetime_ticks = int((self._startTime.timestamp() + 11644473600) * 10_000_000)
+		filetime_ticks += pqWord[0] * 10_000_000 // self._waveFormat.nAvgBytesPerSec
+		return FILETIME(filetime_ticks & 0xFFFFFFFF, filetime_ticks >> 32)
+
+	def IAudio_WaveFormatGet(self) -> SDATA:
+		"""Gets a copy of the current wave format.
+		:returns: A pointer to the WAVEFORMATEX structure.
+			Should be freed by the caller using CoTaskMemFree."""
+		if not self._waveFormat:
+			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
+		size = sizeof(nvwave.WAVEFORMATEX)
+		ptr = windll.ole32.CoTaskMemAlloc(size)
+		if not ptr:
+			raise COMError(hresult.E_OUTOFMEMORY, "CoTaskMemAlloc failed", (None, None, None, None, None))
+		memmove(ptr, addressof(self._waveFormat), size)
+		return SDATA(ptr, size)
+
+	def IAudio_WaveFormatSet(self, dWFEX: SDATA) -> None:
+		"""Sets the current wave format. Only integer PCM formats are supported."""
+		size = 18  # SAPI4 uses 18 bytes without the final padding
+		if dWFEX.dwSize < size:
+			log.debugWarning("Invalid wave format size")
+			raise ReturnHRESULT(hresult.E_INVALIDARG, None)
+		pWfx = cast(dWFEX.pData, POINTER(nvwave.WAVEFORMATEX))
+		if pWfx[0].wFormatTag != nvwave.WAVE_FORMAT_PCM:
+			log.debugWarning("Wave format not supported. Only integer PCM formats are supported.")
+			raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None)
+		if self._deviceState != _DeviceState.CLOSED:
+			log.debugWarning("Cannot change wave format during playback.")
+			raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None)
+		self._waveFormat = nvwave.WAVEFORMATEX()
+		memmove(addressof(self._waveFormat), pWfx, size)
+
+	def _getFreeSpace(self) -> int:
+		if not self._waveFormat:
+			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
+		return self._waveFormat.nAvgBytesPerSec // 5  # always 200ms
+
+	def IAudioDest_FreeSpace(self) -> tuple[DWORD, BOOL]:
+		# TODO: Docstring about return value
+		"""Returns the number of bytes that are free in the object's internal buffer."""
+		return (self._getFreeSpace(), 0)
+
+	def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int):
+		"""Writes audio data to the end of the object's internal buffer.
+		This should not block.
+		When data cannot fit in the buffer, this should return AUDERR_NOTENOUGHDATA immediately."""
+		if self._deviceState != _DeviceState.RUNNING and self._deviceState != _DeviceState.OPENED:
+			log.debugWarning("Audio data written when device is not claimed")
+			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
+		with self._audioCond:
+			self._audioQueue.append(string_at(pBuffer, dwSize))
+			self._writtenBytes += dwSize
+			self._audioCond.notify()
+
+	def IAudioDest_BookMark(self, dwMarkID: int):
+		"""Attaches a bookmark to the most recent data in the audio-destination object's internal buffer.
+		When the bookmark is reached, `IAudioDestNotifySink::BookMark` is called.
+		When Flush is called, untriggered bookmarks should also be triggered."""
+		with self._audioCond:
+			self._audioQueue.append(dwMarkID)
+
+	def _audioThreadFunc(self):
+		while True:
+			with self._audioCond:
+				while not self._audioQueue and not self._audioStopped:
+					# Since WavePlayer.feed returns before the audio finishes,
+					# in order not to lose the final callbacks
+					# when there's no more audio to feed,
+					# wait with a timeout to give WavePlayer a chance
+					# to check the callbacks periodically.
+					self._audioCond.wait(0.1)
+					if self._audioQueue:
+						break
+					if self._deviceState == _DeviceState.CLOSING:
+						# Closing in progress, wait for the audio to finish
+						self._player.feed(None, 0, lambda: self._finishClose())
+					else:
+						# Call feed to let WavePlayer check the callbacks
+						self._player.feed(None, 0, None)
+				if self._audioStopped:
+					return
+				item = self._audioQueue.popleft()
+			if isinstance(item, bytes):  # audio
+				self._player.feed(item, len(item), lambda item=item: self._onChunkFinished(item))
+			elif isinstance(item, int):  # bookmark
+				if self._playedBytes == self._writtenBytes:
+					self._onBookmark(item)  # trigger immediately
+				else:
+					self._player.feed(None, 0, lambda item=item: self._onBookmark(item))
+
+	def _startAudioThread(self):
+		if self._audioThread:
+			return
+		self._audioStopped = False
+		self._audioThread = threading.Thread(target=self._audioThreadFunc)
+		self._audioThread.start()
+
+	def _shutdownAudioThread(self):
+		if not self._audioThread:
+			return
+		with self._audioCond:
+			self._audioStopped = True
+			self._audioCond.notify()
+		if self._audioThread is not threading.current_thread():
+			self._audioThread.join()
+		self._audioThread = None
+
+	def _onChunkFinished(self, chunk: bytes):
+		self._playedBytes += len(chunk)
+		if self._notifySink:
+			self._notifySink.FreeSpace(self._getFreeSpace(), 0)
+
+	def _onBookmark(self, dwMarkID: int):
+		if self._notifySink:
+			self._notifySink.BookMark(dwMarkID, 0)
+
+	def _finishClose(self):
+		if self._deviceState == _DeviceState.CLOSING:
+			self._player.stop()
+			self._shutdownAudioThread()
+			if self._notifySink:
+				self._notifySink.AudioStop(0)  # IANSRSN_NODATA
+			self._deviceState = _DeviceState.CLOSED
+
+
 class SynthDriverSink(COMObject):
 	_com_interfaces_ = [ITTSNotifySinkW]
 
@@ -192,6 +488,7 @@ def __init__(self):
 
 	def terminate(self):
 		self._bufSink._allowDelete = True
+		self._ttsAudio.terminate()
 
 	def speak(self, speechSequence: SpeechSequence):
 		textList = []
@@ -302,8 +599,7 @@ def _set_voice(self, val):
 		if mode is None:
 			raise ValueError("no such mode: %s" % val)
 		self._currentMode = mode
-		self._ttsAudio = CoCreateInstance(CLSID_MMAudioDest, IAudioMultiMediaDevice)
-		self._ttsAudio.DeviceNumSet(_mmDeviceEndpointIdToWaveOutId(config.conf["audio"]["outputDevice"]))
+		self._ttsAudio = SynthDriverAudio()
 		if self._ttsCentral:
 			self._ttsCentral.UnRegister(self._sinkRegKey)
 		self._ttsCentral = POINTER(ITTSCentralW)()
@@ -447,52 +743,6 @@ def _set_volume(self, val: int):
 		self._ttsAttrs.VolumeSet(val)
 
 
-def _mmDeviceEndpointIdToWaveOutId(targetEndpointId: str) -> int:
-	"""Translate from an MMDevice Endpoint ID string to a WaveOut Device ID number.
-
-	:param targetEndpointId: MMDevice endpoint ID string to translate from, or the default value of the `audio.outputDevice` configuration key for the default output device.
-	:return: An integer WaveOut device ID for use with SAPI4.
-		If no matching device is found, or the default output device is requested, `-1` is returned, which means output will be handled by Microsoft Sound Mapper.
-	"""
-	if targetEndpointId != config.conf.getConfigValidation(("audio", "outputDevice")).default:
-		targetEndpointIdByteCount = (len(targetEndpointId) + 1) * sizeof(c_wchar)
-		currEndpointId = create_string_buffer(targetEndpointIdByteCount)
-		currEndpointIdByteCount = DWORD()
-		# Defined in mmeapi.h
-		winmm = windll.winmm
-		waveOutMessage = winmm.waveOutMessage
-		waveOutGetNumDevs = winmm.waveOutGetNumDevs
-		for devID in range(waveOutGetNumDevs()):
-			# Get the length of this device's endpoint ID string.
-			mmr = waveOutMessage(
-				HANDLE(devID),
-				DriverMessage.QUERY_INSTANCE_ID_SIZE,
-				byref(currEndpointIdByteCount),
-				None,
-			)
-			if (mmr != MMSYSERR_NOERROR) or (currEndpointIdByteCount.value != targetEndpointIdByteCount):
-				# ID lengths don't match, so this device can't be a match.
-				continue
-			# Get the device's endpoint ID string.
-			mmr = waveOutMessage(
-				HANDLE(devID),
-				DriverMessage.QUERY_INSTANCE_ID,
-				byref(currEndpointId),
-				currEndpointIdByteCount,
-			)
-			if mmr != MMSYSERR_NOERROR:
-				continue
-			# Decode the endpoint ID string to a python string, and strip the null terminator.
-			if (
-				currEndpointId.raw[: targetEndpointIdByteCount - sizeof(c_wchar)].decode("utf-16")
-				== targetEndpointId
-			):
-				return devID
-	# No matching device found, or default requested explicitly.
-	# Return the ID of Microsoft Sound Mapper
-	return -1
-
-
 def _sapi4DeprecationWarning(synth: SynthDriver, audioOutputDevice: str, isFallback: bool):
 	"""A synthChanged event handler to alert the user about the deprecation of SAPI4."""
 

From 36d183ed093158226495b64172ed8776b82e70d5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 21 Feb 2025 04:15:29 +0000
Subject: [PATCH 02/21] Pre-commit auto-fix

---
 source/synthDrivers/_sapi4.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/source/synthDrivers/_sapi4.py b/source/synthDrivers/_sapi4.py
index a1b9ca2a5b..f67e664d3c 100755
--- a/source/synthDrivers/_sapi4.py
+++ b/source/synthDrivers/_sapi4.py
@@ -268,7 +268,11 @@ class IAudio(IUnknown):
 	COMMETHOD([], HRESULT, "Stop"),
 	COMMETHOD([], HRESULT, "TotalGet", (["out"], POINTER(QWORD), "pqWord")),
 	COMMETHOD(
-		[], HRESULT, "ToFileTime", (["in"], POINTER(QWORD), "pqWord"), (["out"], POINTER(FILETIME), "pFT")
+		[],
+		HRESULT,
+		"ToFileTime",
+		(["in"], POINTER(QWORD), "pqWord"),
+		(["out"], POINTER(FILETIME), "pFT"),
 	),
 	COMMETHOD([], HRESULT, "WaveFormatGet", (["out"], POINTER(SDATA), "pdWFEX")),
 	COMMETHOD([], HRESULT, "WaveFormatSet", (["in"], SDATA, "dWFEX")),
@@ -281,7 +285,11 @@ class IAudioDest(IUnknown):
 
 IAudioDest._methods_ = [
 	COMMETHOD(
-		[], HRESULT, "FreeSpace", (["out"], POINTER(DWORD), "pdwBytes"), (["out"], POINTER(BOOL), "pfEOF")
+		[],
+		HRESULT,
+		"FreeSpace",
+		(["out"], POINTER(DWORD), "pdwBytes"),
+		(["out"], POINTER(BOOL), "pfEOF"),
 	),
 	COMMETHOD([], HRESULT, "DataSet", (["in"], c_void_p, "pBuffer"), (["in"], DWORD, "dwSize")),
 	COMMETHOD([], HRESULT, "BookMark", (["in"], DWORD, "dwMarkID")),

From ad95fea42d7733acd03dcc6717358cfc4ecc01a3 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Fri, 21 Feb 2025 23:13:34 +0800
Subject: [PATCH 03/21] Add type hints & doc strings

---
 source/synthDrivers/sapi4.py | 46 +++++++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 8b5eeae942..8a3ffcba59 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -11,9 +11,21 @@
 import threading
 import winreg
 from comtypes import CoCreateInstance, COMObject, COMError, GUID, hresult, ReturnHRESULT
-from ctypes import addressof, byref, c_ulong, POINTER, c_void_p, cast, memmove, string_at, sizeof, windll
+from ctypes import (
+	addressof,
+	byref,
+	c_ulong,
+	c_ulonglong,
+	POINTER,
+	c_void_p,
+	cast,
+	memmove,
+	string_at,
+	sizeof,
+	windll,
+)
 from ctypes.wintypes import BOOL, DWORD, FILETIME, WORD
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 from autoSettingsUtils.driverSetting import BooleanDriverSetting
 import gui.contextHelp
 import gui.message
@@ -111,6 +123,14 @@ class _DeviceState(IntEnum):
 	CLOSING = 3  # Unclaiming
 
 
+if TYPE_CHECKING:
+	from ctypes import _Pointer
+
+	c_ulonglong_p = _Pointer[c_ulonglong]
+else:
+	c_ulonglong_p = POINTER(c_ulonglong)
+
+
 class SynthDriverAudio(COMObject):
 	"""
 	Implements IAudio and IAudioDest to receive streamed in audio data.
@@ -205,7 +225,7 @@ def IAudio_PosnGet(self) -> int:
 		which should increase monotonically and never reset."""
 		return self._playedBytes
 
-	def IAudio_Claim(self):
+	def IAudio_Claim(self) -> None:
 		"""Acquires (opens) the multimedia device.
 		`IAudioDestNotifySink::AudioStart()` will be called to notify the engine."""
 		if not self._waveFormat:
@@ -222,7 +242,7 @@ def IAudio_Claim(self):
 		if self._notifySink:
 			self._notifySink.AudioStart()
 
-	def IAudio_UnClaim(self):
+	def IAudio_UnClaim(self) -> None:
 		"""Releases the multimedia device asynchronously.
 		`IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops."""
 		if self._deviceState == _DeviceState.CLOSED:
@@ -259,8 +279,7 @@ def IAudio_TotalGet(self) -> int:
 		which should increase monotonically and never reset."""
 		return self._writtenBytes
 
-	def IAudio_ToFileTime(self, pqWord):
-		# TODO: Add type hint
+	def IAudio_ToFileTime(self, pqWord: c_ulonglong_p) -> None:
 		"""Converts a byte position to UTC FILETIME."""
 		if not self._waveFormat:
 			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
@@ -303,11 +322,13 @@ def _getFreeSpace(self) -> int:
 		return self._waveFormat.nAvgBytesPerSec // 5  # always 200ms
 
 	def IAudioDest_FreeSpace(self) -> tuple[DWORD, BOOL]:
-		# TODO: Docstring about return value
-		"""Returns the number of bytes that are free in the object's internal buffer."""
+		"""Returns the number of bytes that are free in the object's internal buffer.
+		:returns: Tuple (dwBytes, fEOF).
+			dwBytes: number of bytes available.
+			fEOF: TRUE if end-of-file is reached and no more data can be sent."""
 		return (self._getFreeSpace(), 0)
 
-	def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int):
+	def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int) -> None:
 		"""Writes audio data to the end of the object's internal buffer.
 		This should not block.
 		When data cannot fit in the buffer, this should return AUDERR_NOTENOUGHDATA immediately."""
@@ -319,7 +340,7 @@ def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int):
 			self._writtenBytes += dwSize
 			self._audioCond.notify()
 
-	def IAudioDest_BookMark(self, dwMarkID: int):
+	def IAudioDest_BookMark(self, dwMarkID: int) -> None:
 		"""Attaches a bookmark to the most recent data in the audio-destination object's internal buffer.
 		When the bookmark is reached, `IAudioDestNotifySink::BookMark` is called.
 		When Flush is called, untriggered bookmarks should also be triggered."""
@@ -327,6 +348,7 @@ def IAudioDest_BookMark(self, dwMarkID: int):
 			self._audioQueue.append(dwMarkID)
 
 	def _audioThreadFunc(self):
+		"""Audio thread function that feeds the audio data from queue to WavePlayer."""
 		while True:
 			with self._audioCond:
 				while not self._audioQueue and not self._audioStopped:
@@ -382,12 +404,14 @@ def _onBookmark(self, dwMarkID: int):
 			self._notifySink.BookMark(dwMarkID, 0)
 
 	def _finishClose(self):
+		"""Finishes the asynchronous UnClaim call."""
 		if self._deviceState == _DeviceState.CLOSING:
 			self._player.stop()
 			self._shutdownAudioThread()
+			self._deviceState = _DeviceState.CLOSED
 			if self._notifySink:
+				# Notify when the device is finally closed
 				self._notifySink.AudioStop(0)  # IANSRSN_NODATA
-			self._deviceState = _DeviceState.CLOSED
 
 
 class SynthDriverSink(COMObject):

From 302968a2b36b2603ce9af12a549f51cf7c196208 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Fri, 21 Feb 2025 23:17:12 +0800
Subject: [PATCH 04/21] Change log level in `cancel` to debugWarning, because
 error 0x80010005 is sometimes raised

---
 source/synthDrivers/sapi4.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 8a3ffcba59..26e19af930 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -591,7 +591,7 @@ def cancel(self):
 			self._bookmarks = None
 			self._ttsCentral.AudioReset()
 		except COMError:
-			log.error("Error cancelling speech", exc_info=True)
+			log.debugWarning("Error cancelling speech", exc_info=True)
 		finally:
 			self._finalIndex = None
 

From df66a5253a518f7cb9ea076f53bb80ac854aa73d Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Sat, 22 Feb 2025 08:59:36 +0800
Subject: [PATCH 05/21] Reset `_startTime` on start

---
 source/synthDrivers/sapi4.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 26e19af930..08c6bdbebe 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -148,6 +148,7 @@ def __init__(self):
 		self._writtenBytes = 0
 		self._playedBytes = 0
 		self._startTime = datetime.now()
+		self._startBytes = 0
 		self._audioQueue: deque[bytes | int] = deque()  # bytes: audio, int: bookmark
 		self._audioCond = threading.Condition()
 		self._audioStopped = False
@@ -257,6 +258,8 @@ def IAudio_Start(self) -> None:
 			raise ReturnHRESULT(AUDERR_ALREADYSTARTED, None)
 		if self._deviceState != _DeviceState.OPENED:
 			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
+		self._startTime = datetime.now()
+		self._startBytes = self._playedBytes
 		try:
 			self._player.pause(False)
 		except OSError:
@@ -284,7 +287,7 @@ def IAudio_ToFileTime(self, pqWord: c_ulonglong_p) -> None:
 		if not self._waveFormat:
 			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
 		filetime_ticks = int((self._startTime.timestamp() + 11644473600) * 10_000_000)
-		filetime_ticks += pqWord[0] * 10_000_000 // self._waveFormat.nAvgBytesPerSec
+		filetime_ticks += (pqWord[0] - self._startBytes) * 10_000_000 // self._waveFormat.nAvgBytesPerSec
 		return FILETIME(filetime_ticks & 0xFFFFFFFF, filetime_ticks >> 32)
 
 	def IAudio_WaveFormatGet(self) -> SDATA:

From d57d6c4b9ab161c142dd3b8f4c7bb9028b4cb512 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Sat, 22 Feb 2025 09:13:31 +0800
Subject: [PATCH 06/21] Implement LevelGet/Set

---
 source/synthDrivers/sapi4.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 08c6bdbebe..37e907315d 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -153,6 +153,7 @@ def __init__(self):
 		self._audioCond = threading.Condition()
 		self._audioStopped = False
 		self._audioThread: Optional[threading.Thread] = None
+		self._level = 0xFFFFFFFF
 
 	def terminate(self):
 		self._shutdownAudioThread()
@@ -181,6 +182,7 @@ def _maybeInitPlayer(self) -> None:
 			outputDevice=config.conf["audio"]["outputDevice"],
 		)
 		self._player.open()
+		self.IAudio_LevelSet(self._level)
 
 	def IAudio_Flush(self) -> None:
 		"""Clears the object's internal buffer and resets the audio device,
@@ -200,14 +202,16 @@ def IAudio_Flush(self) -> None:
 	def IAudio_LevelGet(self) -> int:
 		"""Returns the volume level, ranging from 0x0000 to 0xFFFF.
 		Low word is for the left (or mono) channel, and high word is for the right channel."""
-		# TODO: Not implemented yet.
-		return 0xFFFF
+		return self._level
 
 	def IAudio_LevelSet(self, dwLevel: int) -> None:
 		"""Sets the volume level, ranging from 0x0000 to 0xFFFF.
 		Low word is for the left (or mono) channel, and high word is for the right channel."""
-		# TODO: Not implemented yet.
-		pass
+		self._level = dwLevel
+		if dwLevel & 0xFFFF0000:
+			self._player.setVolume(left=float(dwLevel & 0xFFFF) / 0xFFFF, right=float(dwLevel >> 16) / 0xFFFF)
+		else:
+			self._player.setVolume(all=float(dwLevel) / 0xFFFF)
 
 	def IAudio_PassNotify(self, pNotifyInterface: c_void_p, IIDNotifyInterface: GUID) -> None:
 		"""Passes in an implementation of IAudioDestNotifySink to receive notifications.

From 793a2be9f73cb9d1476ecf4275aa2cc965ae5741 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Sat, 22 Feb 2025 11:43:16 +0800
Subject: [PATCH 07/21] Add changelog entry

---
 user_docs/en/changes.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md
index c79cce0de3..1dffa2812a 100644
--- a/user_docs/en/changes.md
+++ b/user_docs/en/changes.md
@@ -78,6 +78,7 @@ Prefix matching on command line flags, e.g. using `--di` for `--disable-addons`
 * The keyboard settings for "Speak typed characters" and "Speak typed words" now have three options: Off, Only in edit controls, and Always. (#17505, @Cary-rowen)
   * By default, "Speak typed characters" is now set to "Only in edit controls".
 * The silence at the beginning of speech will now be trimmed when using OneCore voices, SAPI5 voices, and some third-party voice add-ons to improve their responsiveness. (#17614, @gexgd0419)
+* Microsoft Speech API version 4 voices now use WASAPI for audio output, so that they can work with features such as audio ducking, leading silence trimming, and keeping audio device awake. (#17718, @gexgd0419)
 
 ### Security Fixes
 

From 858ff5ed6fc0a81bb2308fd41b023d3c9bad6e1d Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Mon, 24 Feb 2025 00:37:56 +0800
Subject: [PATCH 08/21] Change implementation, add docstring

---
 source/synthDrivers/sapi4.py | 172 +++++++++++++++++++++--------------
 1 file changed, 105 insertions(+), 67 deletions(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 37e907315d..b593de3ac8 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -116,13 +116,6 @@ def IUnknown_Release(self, this, *args, **kwargs):
 		return super(SynthDriverBufSink, self).IUnknown_Release(this, *args, **kwargs)
 
 
-class _DeviceState(IntEnum):
-	CLOSED = 0  # Not claimed
-	OPENED = 1  # Claimed
-	RUNNING = 2  # Started
-	CLOSING = 3  # Unclaiming
-
-
 if TYPE_CHECKING:
 	from ctypes import _Pointer
 
@@ -136,13 +129,31 @@ class SynthDriverAudio(COMObject):
 	Implements IAudio and IAudioDest to receive streamed in audio data.
 	An instance of this class will be passed to,
 	and be used by the TTS engine.
+
+	Typically, an engine does the following things to output audio.
+	(Note that different engines may have different implementations)
+
+	- Initialize, such as setting wave format with `WaveFormatSet`, setting notify sink with `PassNotify`, etc.
+	- Call `Claim` to prepare the audio output.
+	- Call `DataSet` to prepare some initial audio data.
+	- Call `Start` to start playing.
+	- Call `DataSet` to provide more audio data,
+	  and call `BookMark` when the engine want to know when audio reaches a specific point.
+	- Call `UnClaim` when all the audio has been written. The audio will still be played to the end.
+	- When pausing the audio, it calls `Stop` and `UnClaim`.
+	- When unpausing the audio, it calls `Claim` and `Start`.
+	- When resetting the audio, it calls `Stop`, `Flush`, and `UnClaim`.
+	  `Stop` and `UnClaim` will not clear the buffer, but `Flush` will.
 	"""
 
 	_com_interfaces_ = [IAudio, IAudioDest]
 
 	def __init__(self):
 		self._notifySink = None
-		self._deviceState = _DeviceState.CLOSED
+		self._deviceClaimed = False
+		self._deviceStarted = False
+		self._deviceUnClaiming = False
+		self._deviceUnClaimingBytePos: Optional[int] = None
 		self._waveFormat: Optional[nvwave.WAVEFORMATEX] = None
 		self._player: Optional[nvwave.WavePlayer] = None
 		self._writtenBytes = 0
@@ -152,11 +163,16 @@ def __init__(self):
 		self._audioQueue: deque[bytes | int] = deque()  # bytes: audio, int: bookmark
 		self._audioCond = threading.Condition()
 		self._audioStopped = False
-		self._audioThread: Optional[threading.Thread] = None
+		self._audioThread = threading.Thread(target=self._audioThreadFunc)
+		self._audioThread.start()
 		self._level = 0xFFFFFFFF
 
 	def terminate(self):
-		self._shutdownAudioThread()
+		with self._audioCond:
+			self._audioStopped = True
+			self._audioCond.notify()
+		if self._audioThread is not threading.current_thread():
+			self._audioThread.join()
 
 	def __del__(self):
 		self.terminate()
@@ -232,53 +248,74 @@ def IAudio_PosnGet(self) -> int:
 
 	def IAudio_Claim(self) -> None:
 		"""Acquires (opens) the multimedia device.
-		`IAudioDestNotifySink::AudioStart()` will be called to notify the engine."""
+		Called before the engine wants to write audio data.
+		`IAudioDestNotifySink::AudioStart()` will be called to notify the engine.
+		Previous buffer should not be cleared.
+		If Claim is called before unclaiming completes, unclaiming is canceled,
+		and neither AudioStop nor AudioStart is notified."""
 		if not self._waveFormat:
 			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
-		if self._deviceState == _DeviceState.CLOSING:
-			# Close immediately
-			self.IAudio_Flush()
-			self._finishClose()
-		if self._deviceState != _DeviceState.CLOSED:
+		with self._audioCond:
+			if self._deviceUnClaiming:
+				# Unclaiming is cancelled, but nothing else is touched.
+				self._deviceUnClaiming = False
+				self._deviceUnClaimingBytePos = None
+				return
+		if self._deviceClaimed:
 			raise ReturnHRESULT(AUDERR_ALREADYCLAIMED, None)
 		self._maybeInitPlayer()
-		self._startAudioThread()
-		self._deviceState = _DeviceState.OPENED
+		self._deviceClaimed = True
 		if self._notifySink:
 			self._notifySink.AudioStart()
 
 	def IAudio_UnClaim(self) -> None:
 		"""Releases the multimedia device asynchronously.
+		Called after the engine completes writing all audio data.
+		If there is audio in the buffer, it should still be played till the end.
 		`IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops."""
-		if self._deviceState == _DeviceState.CLOSED:
+		if not self._deviceClaimed:
 			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
-		self._deviceState = _DeviceState.CLOSING
+		if not self._deviceStarted:
+			# When not playing, this can finish immediately.
+			self._deviceClaimed = False
+			if self._notifySink:
+				self._notifySink.AudioStop(0)  # IANSRSN_NODATA
+			return
+		# When playing, wait for the playback to finish.
 		with self._audioCond:
+			self._deviceUnClaiming = True
+			self._deviceUnClaimingBytePos = self._writtenBytes
 			self._audioCond.notify()
 
 	def IAudio_Start(self) -> None:
-		"""Starts (or resumes) playing."""
-		if self._deviceState == _DeviceState.RUNNING:
+		"""Starts (or resumes) playing the audio in the buffer."""
+		if self._deviceStarted:
 			raise ReturnHRESULT(AUDERR_ALREADYSTARTED, None)
-		if self._deviceState != _DeviceState.OPENED:
+		if not self._deviceClaimed:
 			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
 		self._startTime = datetime.now()
 		self._startBytes = self._playedBytes
 		try:
 			self._player.pause(False)
 		except OSError:
-			pass
-		self._deviceState = _DeviceState.RUNNING
+			log.debugWarning("Error starting audio", exc_info=True)
+		with self._audioCond:
+			self._deviceStarted = True
+			self._audioCond.notify()
 
 	def IAudio_Stop(self) -> None:
-		"""Stops (or pauses) playing, without clearing the buffer."""
-		if self._deviceState != _DeviceState.RUNNING:
+		"""Stops (or pauses) playing, without clearing the buffer.
+		If there is audio in the buffer, calling Stop and UnClaim should keep the buffer
+		and only pause the playback."""
+		if not self._deviceStarted:
 			return  # no error returned
 		try:
 			self._player.pause(True)
 		except OSError:
-			pass
-		self._deviceState = _DeviceState.OPENED
+			log.debugWarning("Error stopping audio", exc_info=True)
+		with self._audioCond:
+			self._deviceStarted = False
+			self._audioCond.notify()
 
 	def IAudio_TotalGet(self) -> int:
 		"""Returns the total number of bytes written,
@@ -317,7 +354,7 @@ def IAudio_WaveFormatSet(self, dWFEX: SDATA) -> None:
 		if pWfx[0].wFormatTag != nvwave.WAVE_FORMAT_PCM:
 			log.debugWarning("Wave format not supported. Only integer PCM formats are supported.")
 			raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None)
-		if self._deviceState != _DeviceState.CLOSED:
+		if self._deviceStarted or self._audioQueue:
 			log.debugWarning("Cannot change wave format during playback.")
 			raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None)
 		self._waveFormat = nvwave.WAVEFORMATEX()
@@ -339,7 +376,7 @@ def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int) -> None:
 		"""Writes audio data to the end of the object's internal buffer.
 		This should not block.
 		When data cannot fit in the buffer, this should return AUDERR_NOTENOUGHDATA immediately."""
-		if self._deviceState != _DeviceState.RUNNING and self._deviceState != _DeviceState.OPENED:
+		if not self._deviceClaimed or self._deviceUnClaiming:
 			log.debugWarning("Audio data written when device is not claimed")
 			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
 		with self._audioCond:
@@ -358,18 +395,28 @@ def _audioThreadFunc(self):
 		"""Audio thread function that feeds the audio data from queue to WavePlayer."""
 		while True:
 			with self._audioCond:
-				while not self._audioQueue and not self._audioStopped:
-					# Since WavePlayer.feed returns before the audio finishes,
-					# in order not to lose the final callbacks
-					# when there's no more audio to feed,
-					# wait with a timeout to give WavePlayer a chance
-					# to check the callbacks periodically.
-					self._audioCond.wait(0.1)
-					if self._audioQueue:
+				while not self._audioStopped and not (self._deviceStarted and self._audioQueue):
+					if self._deviceStarted:
+						# Since WavePlayer.feed returns before the audio finishes,
+						# in order not to lose the final callbacks
+						# when there's no more audio to feed,
+						# wait with a timeout to give WavePlayer a chance
+						# to check the callbacks periodically.
+						self._audioCond.wait(0.1)
+					else:
+						self._audioCond.wait()
+					if self._deviceStarted and self._audioQueue:
 						break
-					if self._deviceState == _DeviceState.CLOSING:
+					if not self._player:
+						continue
+					if self._deviceUnClaimingBytePos is not None:
 						# Closing in progress, wait for the audio to finish
-						self._player.feed(None, 0, lambda: self._finishClose())
+						self._player.feed(
+							None,
+							0,
+							lambda bytePos=self._deviceUnClaimingBytePos: self._finishUnClaim(bytePos),
+						)
+						self._deviceUnClaimingBytePos = None
 					else:
 						# Call feed to let WavePlayer check the callbacks
 						self._player.feed(None, 0, None)
@@ -384,23 +431,6 @@ def _audioThreadFunc(self):
 				else:
 					self._player.feed(None, 0, lambda item=item: self._onBookmark(item))
 
-	def _startAudioThread(self):
-		if self._audioThread:
-			return
-		self._audioStopped = False
-		self._audioThread = threading.Thread(target=self._audioThreadFunc)
-		self._audioThread.start()
-
-	def _shutdownAudioThread(self):
-		if not self._audioThread:
-			return
-		with self._audioCond:
-			self._audioStopped = True
-			self._audioCond.notify()
-		if self._audioThread is not threading.current_thread():
-			self._audioThread.join()
-		self._audioThread = None
-
 	def _onChunkFinished(self, chunk: bytes):
 		self._playedBytes += len(chunk)
 		if self._notifySink:
@@ -410,15 +440,20 @@ def _onBookmark(self, dwMarkID: int):
 		if self._notifySink:
 			self._notifySink.BookMark(dwMarkID, 0)
 
-	def _finishClose(self):
-		"""Finishes the asynchronous UnClaim call."""
-		if self._deviceState == _DeviceState.CLOSING:
-			self._player.stop()
-			self._shutdownAudioThread()
-			self._deviceState = _DeviceState.CLOSED
-			if self._notifySink:
-				# Notify when the device is finally closed
-				self._notifySink.AudioStop(0)  # IANSRSN_NODATA
+	def _finishUnClaim(self, bytePos: int):
+		"""Finishes the asynchronous UnClaim call.
+
+		:param bytePos: The written byte count when this UnClaim request is made.
+			This is checked to prevent triggering on outdated UnClaim requests."""
+		if not self._deviceUnClaiming or self._writtenBytes != bytePos:
+			return
+		self._player.stop()
+		self._deviceStarted = False
+		self._deviceUnClaiming = False
+		self._deviceClaimed = False
+		if self._notifySink:
+			# Notify when the device is finally closed
+			self._notifySink.AudioStop(0)  # IANSRSN_NODATA
 
 
 class SynthDriverSink(COMObject):
@@ -505,6 +540,7 @@ def __init__(self):
 		self._sinkPtr = self._sink.QueryInterface(ITTSNotifySinkW)
 		self._bufSink = SynthDriverBufSink(weakref.ref(self))
 		self._bufSinkPtr = self._bufSink.QueryInterface(ITTSBufNotifySink)
+		self._ttsAudio: Optional[SynthDriverAudio] = None
 		# HACK: Some buggy engines call Release() too many times on our buf sink.
 		# Therefore, don't let the buf sink be deleted before we release it ourselves.
 		self._bufSink._allowDelete = False
@@ -630,6 +666,8 @@ def _set_voice(self, val):
 		if mode is None:
 			raise ValueError("no such mode: %s" % val)
 		self._currentMode = mode
+		if self._ttsAudio:
+			self._ttsAudio.terminate()
 		self._ttsAudio = SynthDriverAudio()
 		if self._ttsCentral:
 			self._ttsCentral.UnRegister(self._sinkRegKey)

From 03ce8108102d3659c33193afeeec4466fc4b4828 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 23 Feb 2025 16:38:50 +0000
Subject: [PATCH 09/21] Pre-commit auto-fix

---
 source/synthDrivers/sapi4.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index b593de3ac8..4e76ccef17 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -5,7 +5,6 @@
 # This module is deprecated, pending removal in NVDA 2026.1.
 
 from datetime import datetime
-from enum import IntEnum
 import locale
 from collections import OrderedDict, deque
 import threading

From a06d3f4a1adab4676a3a27cb7c5f40a582cbf2c2 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Mon, 24 Feb 2025 19:17:17 +0800
Subject: [PATCH 10/21] Stop the player when unclaiming with empty buffer

---
 source/synthDrivers/sapi4.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 4e76ccef17..2501eb309c 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -274,17 +274,20 @@ def IAudio_UnClaim(self) -> None:
 		`IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops."""
 		if not self._deviceClaimed:
 			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
-		if not self._deviceStarted:
+		if self._deviceStarted:
+			# When playing, wait for the playback to finish.
+			with self._audioCond:
+				self._deviceUnClaiming = True
+				self._deviceUnClaimingBytePos = self._writtenBytes
+				self._audioCond.notify()
+		else:
 			# When not playing, this can finish immediately.
+			if self._writtenBytes == self._playedBytes and not self._audioQueue:
+				# If all audio is done playing, stop the player.
+				self._player.stop()
 			self._deviceClaimed = False
 			if self._notifySink:
 				self._notifySink.AudioStop(0)  # IANSRSN_NODATA
-			return
-		# When playing, wait for the playback to finish.
-		with self._audioCond:
-			self._deviceUnClaiming = True
-			self._deviceUnClaimingBytePos = self._writtenBytes
-			self._audioCond.notify()
 
 	def IAudio_Start(self) -> None:
 		"""Starts (or resumes) playing the audio in the buffer."""

From edfca3ea2a23bce67a0f358c4fe26423e25e1042 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Mon, 24 Feb 2025 22:25:03 +0800
Subject: [PATCH 11/21] Remove WinMM-related stuff

---
 source/synthDrivers/_sapi4.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/source/synthDrivers/_sapi4.py b/source/synthDrivers/_sapi4.py
index f67e664d3c..f73d565760 100755
--- a/source/synthDrivers/_sapi4.py
+++ b/source/synthDrivers/_sapi4.py
@@ -18,7 +18,6 @@
 	Structure,
 )
 from ctypes.wintypes import BOOL, BYTE, DWORD, FILETIME, LPCWSTR, WORD
-from enum import IntEnum
 from comtypes import GUID, IUnknown, STDMETHOD, COMMETHOD
 
 import winKernel
@@ -310,19 +309,3 @@ class IAudioDestNotifySink(IUnknown):
 
 CLSID_MMAudioDest = GUID("{CB96B400-C743-11cd-80E5-00AA003E4B50}")
 CLSID_TTSEnumerator = GUID("{D67C0280-C743-11cd-80E5-00AA003E4B50}")
-
-
-# Defined in mmsyscom.h
-MMSYSERR_NOERROR = 0
-
-
-class DriverMessage(IntEnum):
-	"""WaveOutMessage message codes
-	Defined in mmddk.h
-	"""
-
-	QUERY_INSTANCE_ID = 2065
-	"""DRV_QUERYFUNCTIONINSTANCEID """
-
-	QUERY_INSTANCE_ID_SIZE = 2066
-	"""DRV_QUERYFUNCTIONINSTANCEIDSIZE """

From 024e52807d6d9627c5220c78bdd7055c96f9d12f Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 25 Feb 2025 10:17:31 +0800
Subject: [PATCH 12/21] Apply suggestions from code review

Co-authored-by: Sean Budd <seanbudd123@gmail.com>
---
 source/synthDrivers/sapi4.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 2501eb309c..e7915db7f0 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -152,14 +152,17 @@ def __init__(self):
 		self._deviceClaimed = False
 		self._deviceStarted = False
 		self._deviceUnClaiming = False
-		self._deviceUnClaimingBytePos: Optional[int] = None
-		self._waveFormat: Optional[nvwave.WAVEFORMATEX] = None
-		self._player: Optional[nvwave.WavePlayer] = None
+		self._deviceUnClaimingBytePos: int | None = None
+		self._waveFormat: nvwave.WAVEFORMATEX | None = None
+		self._player: nvwave.WavePlayer | None = None
 		self._writtenBytes = 0
 		self._playedBytes = 0
 		self._startTime = datetime.now()
 		self._startBytes = 0
-		self._audioQueue: deque[bytes | int] = deque()  # bytes: audio, int: bookmark
+AudioT: TypeAlias = bytes
+BookmarkT: TypeAlias = int
+
+		self._audioQueue: deque[AudioT | BookmarkT] = deque()
 		self._audioCond = threading.Condition()
 		self._audioStopped = False
 		self._audioThread = threading.Thread(target=self._audioThreadFunc)
@@ -208,7 +211,7 @@ def IAudio_Flush(self) -> None:
 			if self._notifySink:
 				while self._audioQueue:
 					item = self._audioQueue.popleft()
-					if isinstance(item, int):
+					if isinstance(item, BookmarkT):
 						# Flush all untriggered bookmarks.
 						# 1 (TRUE) means that the bookmark is sent because of flushing.
 						self._notifySink.BookMark(item, 1)
@@ -329,7 +332,8 @@ def IAudio_ToFileTime(self, pqWord: c_ulonglong_p) -> None:
 		"""Converts a byte position to UTC FILETIME."""
 		if not self._waveFormat:
 			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
-		filetime_ticks = int((self._startTime.timestamp() + 11644473600) * 10_000_000)
+		UNIX_TIME_CONV = 1_1644_473_600
+		filetime_ticks = int((self._startTime.timestamp() + UNIX_TIME_CONV) * 10_000_000)
 		filetime_ticks += (pqWord[0] - self._startBytes) * 10_000_000 // self._waveFormat.nAvgBytesPerSec
 		return FILETIME(filetime_ticks & 0xFFFFFFFF, filetime_ticks >> 32)
 
@@ -386,7 +390,7 @@ def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int) -> None:
 			self._writtenBytes += dwSize
 			self._audioCond.notify()
 
-	def IAudioDest_BookMark(self, dwMarkID: int) -> None:
+	def IAudioDest_BookMark(self, dwMarkID: BookmarkT) -> None:
 		"""Attaches a bookmark to the most recent data in the audio-destination object's internal buffer.
 		When the bookmark is reached, `IAudioDestNotifySink::BookMark` is called.
 		When Flush is called, untriggered bookmarks should also be triggered."""
@@ -425,20 +429,20 @@ def _audioThreadFunc(self):
 				if self._audioStopped:
 					return
 				item = self._audioQueue.popleft()
-			if isinstance(item, bytes):  # audio
+			if isinstance(item, AudioT):
 				self._player.feed(item, len(item), lambda item=item: self._onChunkFinished(item))
-			elif isinstance(item, int):  # bookmark
+			elif isinstance(item, BookmarkT): 
 				if self._playedBytes == self._writtenBytes:
 					self._onBookmark(item)  # trigger immediately
 				else:
 					self._player.feed(None, 0, lambda item=item: self._onBookmark(item))
 
-	def _onChunkFinished(self, chunk: bytes):
+	def _onChunkFinished(self, chunk: AudioT):
 		self._playedBytes += len(chunk)
 		if self._notifySink:
 			self._notifySink.FreeSpace(self._getFreeSpace(), 0)
 
-	def _onBookmark(self, dwMarkID: int):
+	def _onBookmark(self, dwMarkID: BookmarkT):
 		if self._notifySink:
 			self._notifySink.BookMark(dwMarkID, 0)
 
@@ -542,7 +546,7 @@ def __init__(self):
 		self._sinkPtr = self._sink.QueryInterface(ITTSNotifySinkW)
 		self._bufSink = SynthDriverBufSink(weakref.ref(self))
 		self._bufSinkPtr = self._bufSink.QueryInterface(ITTSBufNotifySink)
-		self._ttsAudio: Optional[SynthDriverAudio] = None
+		self._ttsAudio: SynthDriverAudio | None = None
 		# HACK: Some buggy engines call Release() too many times on our buf sink.
 		# Therefore, don't let the buf sink be deleted before we release it ourselves.
 		self._bufSink._allowDelete = False

From 2d0e7d3b44b25cf08973d81f191ca3d1b587848c Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 25 Feb 2025 10:20:01 +0800
Subject: [PATCH 13/21] Move TypeAlias definitions

---
 source/synthDrivers/sapi4.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index e7915db7f0..ef9ba268b6 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -24,7 +24,7 @@
 	windll,
 )
 from ctypes.wintypes import BOOL, DWORD, FILETIME, WORD
-from typing import TYPE_CHECKING, Optional
+from typing import TYPE_CHECKING, Optional, TypeAlias
 from autoSettingsUtils.driverSetting import BooleanDriverSetting
 import gui.contextHelp
 import gui.message
@@ -121,6 +121,8 @@ def IUnknown_Release(self, this, *args, **kwargs):
 	c_ulonglong_p = _Pointer[c_ulonglong]
 else:
 	c_ulonglong_p = POINTER(c_ulonglong)
+AudioT: TypeAlias = bytes
+BookmarkT: TypeAlias = int
 
 
 class SynthDriverAudio(COMObject):
@@ -159,9 +161,6 @@ def __init__(self):
 		self._playedBytes = 0
 		self._startTime = datetime.now()
 		self._startBytes = 0
-AudioT: TypeAlias = bytes
-BookmarkT: TypeAlias = int
-
 		self._audioQueue: deque[AudioT | BookmarkT] = deque()
 		self._audioCond = threading.Condition()
 		self._audioStopped = False
@@ -431,7 +430,7 @@ def _audioThreadFunc(self):
 				item = self._audioQueue.popleft()
 			if isinstance(item, AudioT):
 				self._player.feed(item, len(item), lambda item=item: self._onChunkFinished(item))
-			elif isinstance(item, BookmarkT): 
+			elif isinstance(item, BookmarkT):
 				if self._playedBytes == self._writtenBytes:
 					self._onBookmark(item)  # trigger immediately
 				else:

From 79016ecc0691f6140d736faa919a12f802d6cea7 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 25 Feb 2025 10:25:25 +0800
Subject: [PATCH 14/21] Check if `_ttsAudio` is null before termination

---
 source/synthDrivers/sapi4.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index ce7442b39b..64ccbdb79e 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -561,7 +561,8 @@ def __init__(self):
 
 	def terminate(self):
 		self._bufSink._allowDelete = True
-		self._ttsAudio.terminate()
+		if self._ttsAudio:
+			self._ttsAudio.terminate()
 		self._ttsCentral = None
 		self._ttsAttrs = None
 

From 9603d31cf7109cb9c1df2c217f61db66920f412e Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 25 Feb 2025 11:18:26 +0800
Subject: [PATCH 15/21] Make AUDERR codes an enum

---
 source/synthDrivers/_sapi4.py | 45 +++++++++++++++++++----------------
 source/synthDrivers/sapi4.py  | 33 +++++++++++--------------
 2 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/source/synthDrivers/_sapi4.py b/source/synthDrivers/_sapi4.py
index f73d565760..a03de1be40 100755
--- a/source/synthDrivers/_sapi4.py
+++ b/source/synthDrivers/_sapi4.py
@@ -18,6 +18,7 @@
 	Structure,
 )
 from ctypes.wintypes import BOOL, BYTE, DWORD, FILETIME, LPCWSTR, WORD
+from enum import IntEnum
 from comtypes import GUID, IUnknown, STDMETHOD, COMMETHOD
 
 import winKernel
@@ -42,26 +43,30 @@
 TTSFEATURE_PITCH = 8
 TTSFEATURE_FIXEDAUDIO = 1024
 
-# Audio related error codes
-AUDERR_BADDEVICEID = -2147220735
-AUDERR_NEEDWAVEFORMAT = -2147220734
-AUDERR_NOTSUPPORTED = -2147467263  # E_NOTIMPL
-AUDERR_NOTENOUGHDATA = -2147220991
-AUDERR_NOTPLAYING = -2147220730
-AUDERR_WAVEFORMATNOTSUPPORTED = -2147220990
-AUDERR_WAVEDEVICEBUSY = -2147220989
-AUDERR_WAVEDEVNOTSUPPORTED = -2147220718
-AUDERR_NOTRECORDING = -2147220717
-AUDERR_INVALIDFLAG = -2147220988
-AUDERR_NODRIVER = -2147220713
-AUDERR_HANDLEBUSY = -2147220712
-AUDERR_INVALIDNOTIFYSINK = -2147220711
-AUDERR_WAVENOTENABLED = -2147220710
-AUDERR_ALREADYCLAIMED = -2147220707
-AUDERR_NOTCLAIMED = -2147220706
-AUDERR_STILLPLAYING = -2147220705
-AUDERR_ALREADYSTARTED = -2147220704
-AUDERR_SYNCNOTALLOWED = -2147220703
+
+class AudioError(IntEnum):
+	"""SAPI4 audio related error codes."""
+
+	BAD_DEVICE_ID = -2147220735
+	NEED_WAVE_FORMAT = -2147220734
+	NOT_SUPPORTED = -2147467263  # E_NOTIMPL
+	NOT_ENOUGH_DATA = -2147220991
+	NOT_PLAYING = -2147220730
+	WAVE_FORMAT_NOT_SUPPORTED = -2147220990
+	WAVE_DEVICE_BUSY = -2147220989
+	WAVE_DEV_NOT_SUPPORTED = -2147220718
+	NOT_RECORDING = -2147220717
+	INVALID_FLAG = -2147220988
+	NO_DRIVER = -2147220713
+	HANDLE_BUSY = -2147220712
+	INVALID_NOTIFY_SINK = -2147220711
+	WAVE_NOT_ENABLED = -2147220710
+	ALREADY_CLAIMED = -2147220707
+	NOT_CLAIMED = -2147220706
+	STILL_PLAYING = -2147220705
+	ALREADY_STARTED = -2147220704
+	SYNC_NOT_ALLOWED = -2147220703
+
 
 LANGID = WORD
 QWORD = c_ulonglong
diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 64ccbdb79e..385d96a334 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -35,12 +35,7 @@
 import warnings
 from utils.security import isRunningOnSecureDesktop
 from ._sapi4 import (
-	AUDERR_ALREADYCLAIMED,
-	AUDERR_ALREADYSTARTED,
-	AUDERR_INVALIDNOTIFYSINK,
-	AUDERR_NEEDWAVEFORMAT,
-	AUDERR_NOTCLAIMED,
-	AUDERR_WAVEFORMATNOTSUPPORTED,
+	AudioError,
 	SDATA,
 	CLSID_TTSEnumerator,
 	IAudio,
@@ -236,7 +231,7 @@ def IAudio_PassNotify(self, pNotifyInterface: c_void_p, IIDNotifyInterface: GUID
 		Allows specifying NULL for no sink."""
 		if IIDNotifyInterface != IAudioDestNotifySink._iid_:
 			log.debugWarning("Only IAudioDestNotifySink is allowed")
-			raise ReturnHRESULT(AUDERR_INVALIDNOTIFYSINK, None)
+			raise ReturnHRESULT(AudioError.INVALID_NOTIFY_SINK, None)
 		if self._notifySink:
 			self._notifySink = None
 		if pNotifyInterface:
@@ -255,7 +250,7 @@ def IAudio_Claim(self) -> None:
 		If Claim is called before unclaiming completes, unclaiming is canceled,
 		and neither AudioStop nor AudioStart is notified."""
 		if not self._waveFormat:
-			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
+			raise ReturnHRESULT(AudioError.NEED_WAVE_FORMAT, None)
 		with self._audioCond:
 			if self._deviceUnClaiming:
 				# Unclaiming is cancelled, but nothing else is touched.
@@ -263,7 +258,7 @@ def IAudio_Claim(self) -> None:
 				self._deviceUnClaimingBytePos = None
 				return
 		if self._deviceClaimed:
-			raise ReturnHRESULT(AUDERR_ALREADYCLAIMED, None)
+			raise ReturnHRESULT(AudioError.ALREADY_CLAIMED, None)
 		self._maybeInitPlayer()
 		self._deviceClaimed = True
 		if self._notifySink:
@@ -275,7 +270,7 @@ def IAudio_UnClaim(self) -> None:
 		If there is audio in the buffer, it should still be played till the end.
 		`IAudioDestNotifySink::AudioStop()` will be called after the audio completely stops."""
 		if not self._deviceClaimed:
-			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
+			raise ReturnHRESULT(AudioError.NOT_CLAIMED, None)
 		if self._deviceStarted:
 			# When playing, wait for the playback to finish.
 			with self._audioCond:
@@ -294,9 +289,9 @@ def IAudio_UnClaim(self) -> None:
 	def IAudio_Start(self) -> None:
 		"""Starts (or resumes) playing the audio in the buffer."""
 		if self._deviceStarted:
-			raise ReturnHRESULT(AUDERR_ALREADYSTARTED, None)
+			raise ReturnHRESULT(AudioError.ALREADY_STARTED, None)
 		if not self._deviceClaimed:
-			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
+			raise ReturnHRESULT(AudioError.NOT_CLAIMED, None)
 		self._startTime = datetime.now()
 		self._startBytes = self._playedBytes
 		try:
@@ -330,7 +325,7 @@ def IAudio_TotalGet(self) -> int:
 	def IAudio_ToFileTime(self, pqWord: c_ulonglong_p) -> None:
 		"""Converts a byte position to UTC FILETIME."""
 		if not self._waveFormat:
-			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
+			raise ReturnHRESULT(AudioError.NEED_WAVE_FORMAT, None)
 		UNIX_TIME_CONV = 1_1644_473_600
 		filetime_ticks = int((self._startTime.timestamp() + UNIX_TIME_CONV) * 10_000_000)
 		filetime_ticks += (pqWord[0] - self._startBytes) * 10_000_000 // self._waveFormat.nAvgBytesPerSec
@@ -341,7 +336,7 @@ def IAudio_WaveFormatGet(self) -> SDATA:
 		:returns: A pointer to the WAVEFORMATEX structure.
 			Should be freed by the caller using CoTaskMemFree."""
 		if not self._waveFormat:
-			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
+			raise ReturnHRESULT(AudioError.NEED_WAVE_FORMAT, None)
 		size = sizeof(nvwave.WAVEFORMATEX)
 		ptr = windll.ole32.CoTaskMemAlloc(size)
 		if not ptr:
@@ -358,16 +353,16 @@ def IAudio_WaveFormatSet(self, dWFEX: SDATA) -> None:
 		pWfx = cast(dWFEX.pData, POINTER(nvwave.WAVEFORMATEX))
 		if pWfx[0].wFormatTag != nvwave.WAVE_FORMAT_PCM:
 			log.debugWarning("Wave format not supported. Only integer PCM formats are supported.")
-			raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None)
+			raise ReturnHRESULT(AudioError.WAVE_FORMAT_NOT_SUPPORTED, None)
 		if self._deviceStarted or self._audioQueue:
 			log.debugWarning("Cannot change wave format during playback.")
-			raise ReturnHRESULT(AUDERR_WAVEFORMATNOTSUPPORTED, None)
+			raise ReturnHRESULT(AudioError.WAVE_FORMAT_NOT_SUPPORTED, None)
 		self._waveFormat = nvwave.WAVEFORMATEX()
 		memmove(addressof(self._waveFormat), pWfx, size)
 
 	def _getFreeSpace(self) -> int:
 		if not self._waveFormat:
-			raise ReturnHRESULT(AUDERR_NEEDWAVEFORMAT, None)
+			raise ReturnHRESULT(AudioError.NEED_WAVE_FORMAT, None)
 		return self._waveFormat.nAvgBytesPerSec // 5  # always 200ms
 
 	def IAudioDest_FreeSpace(self) -> tuple[DWORD, BOOL]:
@@ -380,10 +375,10 @@ def IAudioDest_FreeSpace(self) -> tuple[DWORD, BOOL]:
 	def IAudioDest_DataSet(self, pBuffer: c_void_p, dwSize: int) -> None:
 		"""Writes audio data to the end of the object's internal buffer.
 		This should not block.
-		When data cannot fit in the buffer, this should return AUDERR_NOTENOUGHDATA immediately."""
+		When data cannot fit in the buffer, this should return AudioError.NOT_ENOUGH_DATA immediately."""
 		if not self._deviceClaimed or self._deviceUnClaiming:
 			log.debugWarning("Audio data written when device is not claimed")
-			raise ReturnHRESULT(AUDERR_NOTCLAIMED, None)
+			raise ReturnHRESULT(AudioError.NOT_CLAIMED, None)
 		with self._audioCond:
 			self._audioQueue.append(string_at(pBuffer, dwSize))
 			self._writtenBytes += dwSize

From 8694ae4d96e16d15d52ef299274784d8a6e0cbcb Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 25 Feb 2025 11:35:30 +0800
Subject: [PATCH 16/21] Add comment and type hint

---
 source/synthDrivers/sapi4.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 385d96a334..465ca421ea 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -114,8 +114,10 @@ def IUnknown_Release(self, this, *args, **kwargs):
 	from ctypes import _Pointer
 
 	c_ulonglong_p = _Pointer[c_ulonglong]
+	LP_IAudioDestNotifySink = _Pointer[IAudioDestNotifySink]
 else:
 	c_ulonglong_p = POINTER(c_ulonglong)
+	LP_IAudioDestNotifySink = POINTER(IAudioDestNotifySink)
 AudioT: TypeAlias = bytes
 BookmarkT: TypeAlias = int
 
@@ -145,7 +147,7 @@ class SynthDriverAudio(COMObject):
 	_com_interfaces_ = [IAudio, IAudioDest]
 
 	def __init__(self):
-		self._notifySink = None
+		self._notifySink: LP_IAudioDestNotifySink = None
 		self._deviceClaimed = False
 		self._deviceStarted = False
 		self._deviceUnClaiming = False
@@ -161,7 +163,7 @@ def __init__(self):
 		self._audioStopped = False
 		self._audioThread = threading.Thread(target=self._audioThreadFunc)
 		self._audioThread.start()
-		self._level = 0xFFFFFFFF
+		self._level = 0xFFFFFFFF  # defaults to maximum value (0xFFFF) for both channels (low and high word)
 
 	def terminate(self):
 		with self._audioCond:
@@ -235,7 +237,7 @@ def IAudio_PassNotify(self, pNotifyInterface: c_void_p, IIDNotifyInterface: GUID
 		if self._notifySink:
 			self._notifySink = None
 		if pNotifyInterface:
-			self._notifySink = cast(pNotifyInterface, POINTER(IAudioDestNotifySink))
+			self._notifySink = cast(pNotifyInterface, LP_IAudioDestNotifySink)
 
 	def IAudio_PosnGet(self) -> int:
 		"""Returns the byte position currently being played,

From e99c940d749d8fa96ea4ea33018809463b41d434 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 25 Feb 2025 11:36:53 +0800
Subject: [PATCH 17/21] Release notifySink when terminated

---
 source/synthDrivers/sapi4.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 465ca421ea..bb9e5e9171 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -171,6 +171,7 @@ def terminate(self):
 			self._audioCond.notify()
 		if self._audioThread is not threading.current_thread():
 			self._audioThread.join()
+		self._notifySink = None
 
 	def __del__(self):
 		self.terminate()

From 968261ce39882ee3d84a56973c0cf0b5d1f090e7 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 25 Feb 2025 11:47:03 +0800
Subject: [PATCH 18/21] Remove SAPI 4 deprecation messages

---
 source/synthDrivers/sapi4.py | 55 ++----------------------------------
 user_docs/en/changes.md      |  4 ---
 user_docs/en/userGuide.md    |  1 -
 3 files changed, 2 insertions(+), 58 deletions(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index bb9e5e9171..6ecba04a76 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -25,15 +25,9 @@
 )
 from ctypes.wintypes import BOOL, DWORD, FILETIME, WORD
 from typing import TYPE_CHECKING, Optional, TypeAlias
-from autoSettingsUtils.driverSetting import BooleanDriverSetting
-import gui.contextHelp
-import gui.message
 import nvwave
-import queueHandler
-from synthDriverHandler import SynthDriver, VoiceInfo, synthIndexReached, synthDoneSpeaking, synthChanged
+from synthDriverHandler import SynthDriver, VoiceInfo, synthIndexReached, synthDoneSpeaking
 from logHandler import log
-import warnings
-from utils.security import isRunningOnSecureDesktop
 from ._sapi4 import (
 	AudioError,
 	SDATA,
@@ -76,9 +70,6 @@
 from speech.types import SpeechSequence
 
 
-warnings.warn("synthDrivers.sapi4 is deprecated, pending removal in NVDA 2026.1.", DeprecationWarning)
-
-
 class SynthDriverBufSink(COMObject):
 	_com_interfaces_ = [ITTSBufNotifySink]
 
@@ -498,10 +489,7 @@ def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp: int):
 class SynthDriver(SynthDriver):
 	name = "sapi4"
 	description = "Microsoft Speech API version 4"
-	supportedSettings = [
-		SynthDriver.VoiceSetting(),
-		BooleanDriverSetting("_hasWarningBeenShown", ""),
-	]
+	supportedSettings = [SynthDriver.VoiceSetting()]
 	supportedCommands = {
 		IndexCommand,
 		CharacterModeCommand,
@@ -834,42 +822,3 @@ def _set_volume(self, val: int):
 		# using the low word for the left channel and the high word for the right channel.
 		val |= val << 16
 		self._ttsAttrs.VolumeSet(val)
-
-
-def _sapi4DeprecationWarning(synth: SynthDriver, audioOutputDevice: str, isFallback: bool):
-	"""A synthChanged event handler to alert the user about the deprecation of SAPI4."""
-
-	def setShown(payload: gui.message.Payload):
-		synth._hasWarningBeenShown = True
-		synth.saveSettings()
-
-	def impl():
-		gui.message.MessageDialog(
-			parent=None,
-			message=_(
-				# Translators: Message warning users that SAPI4 is deprecated.
-				"Microsoft Speech API version 4 is obsolete. "
-				"Using this speech synthesizer may pose a security risk. "
-				"This synthesizer driver will be removed in NVDA 2026.1. "
-				"You are strongly encouraged to choose a more modern speech synthesizer. "
-				"Consult the Supported Speech Synthesizers section in the User Guide for suggestions. ",
-			),
-			# Translators: Title of a message dialog.
-			title=_("Warning"),
-			buttons=None,
-		).addOkButton(
-			callback=setShown,
-		).addHelpButton(
-			# Translators: A button in a dialog.
-			label=_("Open user guide"),
-			callback=lambda payload: gui.contextHelp.showHelp("SupportedSpeechSynths"),
-		).Show()
-
-	if (not isFallback) and (synth.name == "sapi4") and (not getattr(synth, "_hasWarningBeenShown", False)):
-		# We need to queue the dialog to appear, as wx may not have been initialised the first time this is called.
-		queueHandler.queueFunction(queueHandler.eventQueue, impl)
-
-
-if not isRunningOnSecureDesktop():
-	# Don't warn users about SAPI4 deprecation when running on a secure desktop.
-	synthChanged.register(_sapi4DeprecationWarning)
diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md
index 0c57cbfd0f..5c0d0e36b1 100644
--- a/user_docs/en/changes.md
+++ b/user_docs/en/changes.md
@@ -4,9 +4,6 @@
 
 ### Important notes
 
-* The support for Microsoft Speech API version 4 synthesizers is planned for removal in NVDA 2026.1.
-Any remaining users of SAPI4 speech synthesizers are encouraged to choose a more modern speech synthesizer. (#17599)
-
 ### New Features
 
 * Add-on Store:
@@ -246,7 +243,6 @@ Use `gui.message.MessageDialog` instead. (#17582)
   * `NoConsoleOptionParser`, `stringToBool`, `stringToLang` in `__main__`; use the same symbols in `argsParsing` instead.
   * `__main__.parser`; use `argsParsing.getParser()` instead.
 * `bdDetect.DeviceType` is deprecated in favour of `bdDetect.ProtocolType` and `bdDetect.CommunicationType` to take into account the fact that both HID and Serial communication can take place over USB and Bluetooth. (#17537 , @LeonarddeR)
-* SAPI4, `synthDrivers.sapi4`, is deprecated and planned for removal in 2026.1. (#17599)
 
 ## 2024.4.2
 
diff --git a/user_docs/en/userGuide.md b/user_docs/en/userGuide.md
index f0e68a5ba4..2f85388134 100644
--- a/user_docs/en/userGuide.md
+++ b/user_docs/en/userGuide.md
@@ -3942,7 +3942,6 @@ There are also many variants which can be chosen to alter the sound of the voice
 SAPI 4 is an older Microsoft standard for software speech synthesizers.
 NVDA still supports this for users who already have SAPI 4 synthesizers installed.
 However, Microsoft no longer support this and needed components are no longer available from Microsoft.
-Support for SAPI4 will be removed in NVDA 2026.1.
 
 When using this synthesizer with NVDA, the available voices (accessed from the [Speech category](#SpeechSettings) of the [NVDA Settings](#NVDASettings) dialog or by the [Synth Settings Ring](#SynthSettingsRing)) will contain all the voices from all the installed SAPI 4 engines found on your system.
 

From 648b391eb0a8111d0c76894112037f5ac5421659 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 25 Feb 2025 12:13:42 +0800
Subject: [PATCH 19/21] Add SAPI 4 links in projectDocs

---
 projectDocs/design/synthesizers.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/projectDocs/design/synthesizers.md b/projectDocs/design/synthesizers.md
index 5032f10446..8acbec6d31 100644
--- a/projectDocs/design/synthesizers.md
+++ b/projectDocs/design/synthesizers.md
@@ -5,10 +5,11 @@
 SAPI 4 synthesizers are not included with NVDA, and the runtimes are no longer included with Windows.
 Despite this, SAPI 4 support is still required, as many users prefer older synthesizers which rely on the SAPI 4 API.
 
-To test SAPI 4, you must install the SAPI 4 runtimes from Microsoft, as well as a synthesizer.
+To test SAPI 4, you must install the SAPI 4 runtimes (or the SDK containing the runtimes) from Microsoft, as well as a synthesizer.
 Microsoft no longer hosts downloads for these, but archives and mirrors exist.
 
-1. Download and install the SAPI 4 runtimes from [this Microsoft archive](http://web.archive.org/web/20150910165037/http://activex.microsoft.com/activex/controls/sapi/spchapi.exe).
-1. Download and install a SAPI 4 synthesizer from [this Microsoft archive](http://web.archive.org/web/20150910005021if_/http://activex.microsoft.com/activex/controls/agent2/tv_enua.exe)
+First, you can download and install either only the SAPI 4 runtimes from [this Microsoft archive](http://web.archive.org/web/20150910165037/http://activex.microsoft.com/activex/controls/sapi/spchapi.exe), or the SAPI 4 SDK from [this Microsoft archive](https://web.archive.org/web/20030203082745/http://download.microsoft.com/download/speechSDK/Install/4.0a/WIN98/EN-US/SAPI4SDK.exe), which contains the SAPI 4 runtimes, offline documentation, and sample source code & test applications. If you only need the offline documentation, you can download the documentation for SAPI 4 COM interfaces from [this Microsoft archive](https://web.archive.org/web/19990418101425/http://www.microsoft.com/iit/onlineDocs/speechsdk4-com.chm).
+
+After installing the runtimes, download and install a SAPI 4 synthesizer from [this Microsoft archive](http://web.archive.org/web/20150910005021if_/http://activex.microsoft.com/activex/controls/agent2/tv_enua.exe).
 
 After this, you should be able to select SAPI 4 as a NVDA synthesizer.

From 0ac0402f4b6b26c0f1c14dca8a9303d2fb6f3320 Mon Sep 17 00:00:00 2001
From: Sean Budd <seanbudd123@gmail.com>
Date: Tue, 25 Feb 2025 15:27:35 +1100
Subject: [PATCH 20/21] Apply suggestions from code review

---
 projectDocs/design/synthesizers.md | 3 ++-
 source/synthDrivers/sapi4.py       | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/projectDocs/design/synthesizers.md b/projectDocs/design/synthesizers.md
index 8acbec6d31..eef162f4cd 100644
--- a/projectDocs/design/synthesizers.md
+++ b/projectDocs/design/synthesizers.md
@@ -8,7 +8,8 @@ Despite this, SAPI 4 support is still required, as many users prefer older synth
 To test SAPI 4, you must install the SAPI 4 runtimes (or the SDK containing the runtimes) from Microsoft, as well as a synthesizer.
 Microsoft no longer hosts downloads for these, but archives and mirrors exist.
 
-First, you can download and install either only the SAPI 4 runtimes from [this Microsoft archive](http://web.archive.org/web/20150910165037/http://activex.microsoft.com/activex/controls/sapi/spchapi.exe), or the SAPI 4 SDK from [this Microsoft archive](https://web.archive.org/web/20030203082745/http://download.microsoft.com/download/speechSDK/Install/4.0a/WIN98/EN-US/SAPI4SDK.exe), which contains the SAPI 4 runtimes, offline documentation, and sample source code & test applications. If you only need the offline documentation, you can download the documentation for SAPI 4 COM interfaces from [this Microsoft archive](https://web.archive.org/web/19990418101425/http://www.microsoft.com/iit/onlineDocs/speechsdk4-com.chm).
+First, you can download and install either only the SAPI 4 runtimes from [this Microsoft archive](http://web.archive.org/web/20150910165037/http://activex.microsoft.com/activex/controls/sapi/spchapi.exe), or the SAPI 4 SDK from [this Microsoft archive](https://web.archive.org/web/20030203082745/http://download.microsoft.com/download/speechSDK/Install/4.0a/WIN98/EN-US/SAPI4SDK.exe), which contains the SAPI 4 runtimes, offline documentation, and sample source code & test applications.
+If you only need the offline documentation, you can download the documentation for SAPI 4 COM interfaces from [this Microsoft archive](https://web.archive.org/web/19990418101425/http://www.microsoft.com/iit/onlineDocs/speechsdk4-com.chm).
 
 After installing the runtimes, download and install a SAPI 4 synthesizer from [this Microsoft archive](http://web.archive.org/web/20150910005021if_/http://activex.microsoft.com/activex/controls/agent2/tv_enua.exe).
 
diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 6ecba04a76..db77c1c3d3 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -109,6 +109,7 @@ def IUnknown_Release(self, this, *args, **kwargs):
 else:
 	c_ulonglong_p = POINTER(c_ulonglong)
 	LP_IAudioDestNotifySink = POINTER(IAudioDestNotifySink)
+
 AudioT: TypeAlias = bytes
 BookmarkT: TypeAlias = int
 
@@ -138,7 +139,7 @@ class SynthDriverAudio(COMObject):
 	_com_interfaces_ = [IAudio, IAudioDest]
 
 	def __init__(self):
-		self._notifySink: LP_IAudioDestNotifySink = None
+		self._notifySink: LP_IAudioDestNotifySink | None = None
 		self._deviceClaimed = False
 		self._deviceStarted = False
 		self._deviceUnClaiming = False

From 041bf7f66e876656e62f0b2d64e0980c1113e062 Mon Sep 17 00:00:00 2001
From: Sean Budd <seanbudd123@gmail.com>
Date: Tue, 25 Feb 2025 15:30:55 +1100
Subject: [PATCH 21/21] Update source/synthDrivers/sapi4.py

---
 source/synthDrivers/sapi4.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index db77c1c3d3..82c5e48ba9 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -2,7 +2,6 @@
 # Copyright (C) 2006-2025 NV Access Limited, Leonard de Ruijter
 # This file is covered by the GNU General Public License.
 # See the file COPYING for more details.
-# This module is deprecated, pending removal in NVDA 2026.1.
 
 from datetime import datetime
 import locale