Skip to content

Commit be759f3

Browse files
author
matatonic
committed
0.15.0
1 parent c957ad8 commit be759f3

7 files changed

+24
-34
lines changed

Dockerfile

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
FROM python:3.11-slim
22

3+
RUN --mount=type=cache,target=/root/.cache/pip pip install -U pip
4+
35
ARG TARGETPLATFORM
4-
RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg
6+
RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg libaio-dev
57
RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then apt-get install --no-install-recommends -y build-essential ; fi
68
RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; fi
79
ENV PATH="/root/.cargo/bin:${PATH}"

Dockerfile.min

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ RUN mkdir -p voices config
1212

1313
COPY requirements*.txt /app/
1414
RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements-min.txt
15-
COPY speech.py openedai.py say.py *.sh *.default.yaml README.md LICENSE /app/
15+
COPY *.py *.sh *.default.yaml README.md LICENSE /app/
1616

1717
ENV TTS_HOME=voices
1818
ENV HF_HOME=voices

README.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know s
2929

3030
## Recent Changes
3131

32+
Version 0.15.0, 2024-06-26
33+
34+
* Switch to [coqui-tts](https://github.com/idiap/coqui-ai-TTS) (updated fork), updated simpler dependencies, torch 2.3, etc.
35+
* Resolve cuda threading issues
36+
3237
Version 0.14.1, 2024-06-26
3338

3439
* Make deepspeed possible (`--use-deepspeed`), but not enabled in pre-built docker images (too large). Requires the cuda-toolkit installed, see the Dockerfile comment for details
@@ -127,7 +132,7 @@ source .venv/bin/activate
127132
# Install the Python requirements
128133
# - use requirements-rocm.txt for AMD GPU (ROCm support)
129134
# - use requirements-min.txt for piper only (CPU only)
130-
pip install -r requirements.txt
135+
pip install -U -r requirements.txt
131136
# run the server
132137
bash startup.sh
133138
```

requirements-min.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
pyyaml
21
fastapi
32
uvicorn
43
loguru
54
numpy<2
6-
piper-tts==1.2.0
5+
piper-tts

requirements-rocm.txt

+5-14
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,8 @@
11
fastapi
22
uvicorn
33
loguru
4-
# piper-tts
5-
piper-tts==1.2.0
6-
# xtts
7-
TTS==0.22.0
8-
# https://github.com/huggingface/transformers/issues/31040
9-
transformers<4.41.0
10-
deepspeed<0.14.0
11-
# XXX, 3.8+ has some issue for now
12-
spacy==3.7.4
13-
14-
# torch==2.2.2 Fixes: https://github.com/matatonic/openedai-speech/issues/9
15-
# Re: https://github.com/pytorch/pytorch/issues/121834
16-
torch==2.2.2; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
17-
torchaudio==2.2.2; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
4+
piper-tts
5+
coqui-tts
6+
deepspeed
7+
torch; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
8+
torchaudio; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"

requirements.txt

+6-14
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,14 @@
11
fastapi
22
uvicorn
33
loguru
4-
# piper-tts
5-
piper-tts==1.2.0
6-
# xtts
7-
TTS==0.22.0
8-
# https://github.com/huggingface/transformers/issues/31040
9-
transformers<4.41.0
10-
deepspeed<0.14.0
11-
# XXX, 3.8+ has some issue for now
12-
spacy==3.7.4
4+
piper-tts
5+
coqui-tts[languages]
6+
deepspeed
137

14-
# torch==2.2.2 Fixes: https://github.com/matatonic/openedai-speech/issues/9
15-
# Re: https://github.com/pytorch/pytorch/issues/121834
16-
torch==2.2.2; sys_platform != "darwin"
8+
torch; sys_platform != "darwin"
179
torchaudio; sys_platform != "darwin"
1810
# for MPS accelerated torch on Mac - doesn't work yet, incomplete support in torch and torchaudio
19-
torch==2.2.2; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
20-
torchaudio==2.2.2; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
11+
torch; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
12+
torchaudio; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
2113

2214
# ROCM (Linux only) - use requirements.amd.txt

speech.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ def tts(self, text, language, speaker_wav, **hf_generate_kwargs):
9292
self.not_idle()
9393
try:
9494
with torch.no_grad():
95-
gpt_cond_latent, speaker_embedding = self.xtts.get_conditioning_latents(audio_path=[speaker_wav]) # XXX TODO: allow multiple wav
95+
with self.lock: # this doesn't seem threadsafe, but it's quick enough
96+
gpt_cond_latent, speaker_embedding = self.xtts.get_conditioning_latents(audio_path=[speaker_wav]) # XXX TODO: allow multiple wav
9697

9798
for wav in self.xtts.inference_stream(text, language, gpt_cond_latent, speaker_embedding, **hf_generate_kwargs):
9899
yield wav.cpu().numpy().tobytes() # assumes wav data is f32le

0 commit comments

Comments
 (0)