diff --git a/speech.py b/speech.py index e412e8b..32ad49c 100755 --- a/speech.py +++ b/speech.py @@ -18,6 +18,9 @@ from pydantic import BaseModel import uvicorn +# Get the silence length from an environment variable, default to 0.2 seconds if not set +SILENCE_LENGTH = float(os.getenv("SILENCE_LENGTH", "0.2")) + @contextlib.asynccontextmanager async def lifespan(app): yield @@ -349,10 +352,15 @@ def generator(): logger.debug(f"{voice} wav samples: {audio_path}") try: - for text in all_text: + for i, text in enumerate(all_text): for chunk in xtts.tts(text=text, language=language, audio_path=audio_path, **hf_generate_kwargs): exception_check(ex_q) in_q.put(chunk) + + # Add a short pause (e.g., 0.2 seconds) between chunks + if i < len(all_text) - 1: + silence_chunk = bytes([0] * int(24000 * SILENCE_LENGTH / speed * 4)) # Configurable silence length + in_q.put(silence_chunk) except BrokenPipeError as e: # client disconnect lands here logger.info("Client disconnected - 'Broken pipe'")